Download pictures online or LAN and report the error locally

I. download pictures online

import os
import sys
import xlrd
import requests
import urllib.request

def read_excel(excel_path):
    workbook = xlrd.open_workbook(excel_path)
    sheet = workbook.sheet_by_name("Sheet1")
    nrows = sheet.nrows
    img_list = []
    for i in range(nrows):
        img_list.append(sheet.row_values(i)[0])  # Get column 1 data
    print("list1", img_list)
    return img_list

def get_HTML():
    headers={"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",}
    data = {'show_env': '1'}
    return res_params

def _progress(filename,block_num, block_size, total_size):
    '''Callback function
       @block_num: Downloaded data block
       @block_size: Block size
       @total_size: Size of the remote file
    sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
                     float(block_num * block_size) / float(total_size) * 100.0))

def get_imgName(excel_path,target_dir,url,_progress):
    img_list = read_excel(excel_path)
    for img in img_list:
        image_url = url + img                                           # Files to download
            if not os.path.exists(target_dir):
                os.makedirs(target_dir)                                 # If there is no path, create it directly
            filename = target_dir+img                                   # Save the local filename.
            urllib.request.urlretrieve(image_url, filename, _progress)  # Download pictures with urllib.request.urltrieve method
        except Exception as e:
            with open('./download_fail.csv') as download_fail:
            print('Error skipping: file=' + img + ' exception:', e)
    print("All data download completed!")

if __name__ == '__main__':
    excel_path = './20190330_31.xls'                  # Database file directory
    target_dir = 'D:/invoice/'                        # Save to local directory
    url = ''                      # server address

II. Download pictures of local area network (intranet)

Because the pictures stored in the company are encrypted or stored in the form of data, the pictures are encoded as base64. The internal server data transmission interface of the company is written in java, so I use pyhessian here.

Early installation configuration is also very troublesome (because the company's computer is unable to connect to the network, it must be installed offline). There are various problems in the runtime. There are many ways to solve these problems on the Internet.

2.1 installation:

Online installation: directly execute PIP install Python Hessian
Offline installation: Download: six-1.12.0-py2.py3-none-any.whl , python_hessian-1.1.0-py2.py3-none-any.whl

# Open the terminal to execute directly in the downloaded directory
pip install Package_name
2.2 call
# coding:utf-8
import base64
import xlrd
import json
import time
from pyhessian.client import HessianProxy

# The downloaded data is queried from the Oracle database.
def read_excel(excel_path):
    workbook = xlrd.open_workbook(excel_path)
    sheet = workbook.sheet_by_name("SQL_Results")
    nrows = sheet.nrows
    fileids = []
    for i in range(1, nrows):
        fileids.append(sheet.row_values(i)[5])  # Get column 6 data
    print("list: ", fileids)
    return fileids

def download(request_params):
    j = json.loads(request_params)
    print("type(j):", type(j))
    if 'seqNum' not in j:
        result_json = {'code': '1', 'msg': 'Missing parameters:seqNum'}
        print('Missing parameters:seqNum')
        return json.dumps(result_json), {'Content-Type': 'application/xxx'}

    if 'fileid' not in j:
        result_json = {'code': '1', 'msg': 'Missing parameters:fileid'}
        print('Missing parameters:fileid')
        return json.dumps(result_json), {'Content-Type': 'application/xxx'}

    systemId = 'XXXX'
    seqNum = j['seqNum']
    sceneY = u'Field 1'
    sceneE = u'Field two'
    sceneS = u'Paragraph 3'
    fileid = j['fileid']
    url = ''
    params = {
        'systemId': systemId,
        'sceneY': sceneY,
        'sceneE': sceneE,
        'sceneS': sceneS,
        'seqNum': seqNum,
        'fileId': fileid

    service = HessianProxy(url)
    result = service.downloadFile(params) # This method is written by java and described in the interface document.
    print("return result:", result)

    resCode = result['resCode']
    resMsg = result['resMsg']
    fileData = result['fileData']
    img_b64decode = base64.b64decode(fileData)
    with open('./download_img_20190402/' + str(fileid) + '.jpg', "wb") as f:
        print('save successful!')

def get_base64(fileids):
    scale = len(fileids)
    download_fails = []
    for i, fileid in enumerate(fileids):
        a = '#' * int(i / 100)
        b = '.' * (int(scale / 100) - int(i / 100))
        c = (i / scale) * 100
        time.sleep(0.2)  # Sit back

        seqNum = 'invoice_test_data_' + str(time.strftime("%Y%m%d%H%M%S", time.localtime()))
        request_params = json.dumps({'seqNum': seqNum, 'fileid': fileid})
        print('request_params:', request_params)
            print("{:^3.2f}%[{}->{}]".format(c, a, b))  # Progress bar
    return download_fails

if __name__ == '__main__':
    excel_path = './20190401_company_product_data.xls'
    fileids = read_excel(excel_path)
    download_fails = get_base64(fileids)

    with open('./download_fail.txt', 'wb') as ff:
        for download_fail in download_fails:
            ff.write(','.join(download_fail) + '\n')

You can use tqdm directly for progress bar. I'm too lazy to download it.
About the call example:
About calling methods:

Keywords: JSON Database network Java

Added by Haggen on Tue, 03 Dec 2019 08:53:11 +0200