python crawler hotspot item - slider verification code item (take Bili as an example)

1. Simulated website:

Bili bilibili video network: https://passport.bilibili.com/login

2. Development environment

This project needs to use

time

random

selenium

PIL

Please install the corresponding version of the library as follows. All other libraries are standard libraries. There is no need to install pip install pilot pip install selenium.

3. Project process introduction

Initialization
Request the login page of bilibili & simulate to input the account password
Verification code picture with shadow puzzle & obtaining verification code picture
Compare two captcha pictures to get the offset of captcha slider
Use offset value to calculate move operation
Operate the slider button to simulate dragging the slider to verify login

5.bilibili analog login - initialization and analog input of account password

class Bilibili(object):

	def __init__(self):
		#Creating browser objects
		self.driver = webdriver.Chrome()
		#Implicit waiting
		self.driver.implicitly_wait(3)
		self.url = 'https://passport.bilibili.com/login'
		#User name
		self.user = ''
		#Password
		self.pwd = ''

	def close(self):
		'''
		//Close browser
		'''
		self.driver.quit()

	def input_user_pwd(self):
		'''
		   //Enter user name and password
		'''
		#Enter the landing page
		self.driver.get(self.url)

		#Text box enter user name
		tb_user = self.driver.find_element_by_id('login-username')
		tb_user.send_keys(self.user)
		#Text box enter password
		tb_pwd = self.driver.find_element_by_id('login-passwd')
		tb_pwd.send_keys(self.pwd)

6. Verification code picture with shadow puzzle & obtain verification code picture

    def get_screenshot(self):
        '''
        //Get screenshot
        '''
        screenshot = self.driver.get_screenshot_as_png()
        screenshot =Image.open(BytesIO(screenshot))

        return screenshot
    def update_style(self):
        '''
            //Modify the style property of the picture to display the picture without gap
        '''
        js = 'document.querySelectorAll("canvas")[3].style="display:block"'
        self.driver.execute_script(js)
        time.sleep(2)

    def get_position(self):
        '''
            //Get four sides when intercepting verification code
        '''
        #Navigate to the login button
        bt_login = self.driver.find_element_by_xpath('//a[@class="btn btn-login"]')
        #Simulated Click
        bt_login.click()
        time.sleep(2)
        #Get captcha picture object
        code_img = self.driver.find_element_by_xpath('//canvas[@class="geetest_canvas_slice geetest_absolute"]')
        time.sleep(2)

        location = code_img.location
        size = code_img.size

        #screenshot = self.get_screenshot()
        #print(screenshot.size)

        #Calculate the image capture area (coordinates of left, top, right, bottom)
        left,top,right,buttom = location['x'],location['y'],location['x']+size['width'],location['y']+size['height']
        return left,top,right,buttom


    def get_image(self):
        '''
            //Capture captcha image
        '''
        #Get verification code location
        position = self.get_position()
        #Extract the gap captcha image from the screenshot
        captcha1 = self.get_screenshot().crop(position)
        #Modify the style attribute to display a picture of the verification code without gaps
        self.update_style()
        #Pick out the picture of verification code without gap from the screenshot
        captcha2 = self.get_screenshot().crop(position)

        with open('captcha1.png','wb') as f1 ,open('captcha2.png','wb') as f2:
            captcha1.save(f1)
            captcha2.save(f2)

        return captcha1,captcha2

7. Compare two captcha images to get the offset of captcha slider

    def is_pixel_equal(self,img1,img2,x,y):
        '''
            //Determine whether the RGB value of the same pixel of two pictures is equal
        '''
        pixel1,pixel2= img1.load()[x,y],img2.load()[x,y]
        #print(pixel1,pixel2)
        #Set a baseline
        sub_index = 60

        #compare
        if abs(pixel1[0]-pixel2[0])< sub_index and abs(pixel1[1]-pixel2[1])< sub_index and abs(pixel1[2]-pixel2[2])< sub_index:
            return True
        else:
            return False

    def get_gap_offset(self,img1,img2):
        '''
            //Get the offset of the notch
        '''
        x = int(img1.size[0]/4.2)
        for i in range(x,img1.size[0]):
            for j in range(img1.size[1]):
                #Compare the two pictures, (i,j) RGB difference of pixel points, if it is too large, x is the offset value.
                if not self.is_pixel_equal(img1,img2,i,j):
                    x = i
                    return x
        return x

8. Use offset value to calculate movement operation (track)

    def get_track(self,offset):
        '''
           //Simulation human drag verification code slider
        '''
        track = []
        #Slider start x coordinate
        current = 5
        #Variable speed threshold

        border_point = int(offset*3/5)
        #set time interval
        t = 0.2
        #Set initial speed
        offset +=4
        v = 0
        #Loop until exit when sliding to offset
        while current < offset:
            #Change the motion state according to whether the critical point or not
            if current < border_point:
                #acceleration
                a = 1
            else:
                a =-0.5
            v0 = v
            v = v0 + a*t

            move = v0*t +0.5*a*t*t

            current += move

            track.append(round(move))

        return track

9. Operate the slider button to simulate dragging the slider for verification and login.

    def shake_mouse(self):
        """
        //Simulate hand release mouse shake
        :return: None
        """
        ActionChains(self.driver).move_by_offset(xoffset=-2,yoffset=0).perform()
        ActionChains(self.driver).move_by_offset(xoffset=2,yoffset=0).perform()

    def operate_slider(self,track):
        '''
           //Drag slider
        '''
        #Get drag button
        back_tracks = [-1,-1,-2,-1]
        slider_bt = self.driver.find_element_by_xpath('//div[@class="geetest_slider_button"]')

        #Click the button to drag the verification code
        ActionChains(self.driver).click_and_hold(slider_bt).perform()

        #Move forward
        for i in track:
            ActionChains(self.driver).move_by_offset(xoffset=i,yoffset=0).perform()
           #The acceleration and deceleration effect is not very good.
           #Every time a random pause of 0-1 / 100s is moved, it swindles the polar test and has a high passing rate.
            time.sleep(random.random()/100)
        time.sleep(random.random())
        #Move in reverse track
        for i in back_tracks:
            time.sleep(random.random()/100)
            ActionChains(self.driver).move_by_offset(xoffset=i,yoffset=0).perform()
        #Simulate hand shake
        self.shake_mouse()
        time.sleep(random.random())
        #Release slider button
        ActionChains(self.driver).release().perform()


    def do_captcha(self):
        '''
            //Implement processing verification code
        '''
        #Pictures with and without gaps
        img1,img2 = self.get_image()
        #Compare two captcha pictures to get the offset of captcha slider
        offset = self.get_gap_offset(img1,img2)
        print(offset)

        #Use offset value to calculate move operation
        track = self.get_track(offset)

        #Operate the slider button to simulate dragging the slider to verify login
        self.operate_slider(track)

    def login(self):
        '''
        //Implement the main login logic
        '''
        #Go to the login interface and enter the account password
        self.input_user_pwd()
        #Process verification code
        self.do_captcha()


        #Close browser
        self.close()

    def run(self):
        self.login()

if __name__ == '__main__':
    bili =Bilibili()
    bili.run()

Keywords: pip Selenium network Attribute

Added by Marsha on Sat, 26 Oct 2019 00:40:42 +0300

Programming VIP