I. download pictures online
import os import sys import xlrd import requests import urllib.request def read_excel(excel_path): workbook = xlrd.open_workbook(excel_path) sheet = workbook.sheet_by_name("Sheet1") nrows = sheet.nrows img_list = [] for i in range(nrows): img_list.append(sheet.row_values(i)[0]) # Get column 1 data print("list1", img_list) return img_list def get_HTML(): url='http://www.ivsky.com/tupian/haiyangshijie/' headers={"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",} data = {'show_env': '1'} res_params=requests.get(url,headers=headers,params=data) return res_params def _progress(filename,block_num, block_size, total_size): '''Callback function @block_num: Downloaded data block @block_size: Block size @total_size: Size of the remote file ''' sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(block_num * block_size) / float(total_size) * 100.0)) sys.stdout.flush() def get_imgName(excel_path,target_dir,url,_progress): img_list = read_excel(excel_path) for img in img_list: image_url = url + img # Files to download try: if not os.path.exists(target_dir): os.makedirs(target_dir) # If there is no path, create it directly filename = target_dir+img # Save the local filename. urllib.request.urlretrieve(image_url, filename, _progress) # Download pictures with urllib.request.urltrieve method except Exception as e: with open('./download_fail.csv') as download_fail: download_fail.write(image_url) print('Error skipping: file=' + img + ' exception:', e) continue print("All data download completed!") if __name__ == '__main__': excel_path = './20190330_31.xls' # Database file directory target_dir = 'D:/invoice/' # Save to local directory url = 'http://10.28.11.139/' # server address get_imgName(excel_path,target_dir,url,_progress)
II. Download pictures of local area network (intranet)
Because the pictures stored in the company are encrypted or stored in the form of data, the pictures are encoded as base64. The internal server data transmission interface of the company is written in java, so I use pyhessian here.
Early installation configuration is also very troublesome (because the company's computer is unable to connect to the network, it must be installed offline). There are various problems in the runtime. There are many ways to solve these problems on the Internet.
2.1 installation:
Online installation: directly execute PIP install Python Hessian
Offline installation: Download: six-1.12.0-py2.py3-none-any.whl , python_hessian-1.1.0-py2.py3-none-any.whl
# Open the terminal to execute directly in the downloaded directory
pip install Package_name
2.2 call
# coding:utf-8 import base64 import xlrd import json import time from pyhessian.client import HessianProxy # The downloaded data is queried from the Oracle database. def read_excel(excel_path): workbook = xlrd.open_workbook(excel_path) sheet = workbook.sheet_by_name("SQL_Results") nrows = sheet.nrows fileids = [] for i in range(1, nrows): fileids.append(sheet.row_values(i)[5]) # Get column 6 data print("list: ", fileids) return fileids def download(request_params): j = json.loads(request_params) print("type(j):", type(j)) if 'seqNum' not in j: result_json = {'code': '1', 'msg': 'Missing parameters:seqNum'} print('Missing parameters:seqNum') return json.dumps(result_json), {'Content-Type': 'application/xxx'} if 'fileid' not in j: result_json = {'code': '1', 'msg': 'Missing parameters:fileid'} print('Missing parameters:fileid') return json.dumps(result_json), {'Content-Type': 'application/xxx'} systemId = 'XXXX' seqNum = j['seqNum'] sceneY = u'Field 1' sceneE = u'Field two' sceneS = u'Paragraph 3' fileid = j['fileid'] url = 'http://10.x.xxx.xx:8000/hessian/xxx/xxx' params = { 'systemId': systemId, 'sceneY': sceneY, 'sceneE': sceneE, 'sceneS': sceneS, 'seqNum': seqNum, 'fileId': fileid } service = HessianProxy(url) result = service.downloadFile(params) # This method is written by java and described in the interface document. print("return result:", result) resCode = result['resCode'] resMsg = result['resMsg'] fileData = result['fileData'] img_b64decode = base64.b64decode(fileData) with open('./download_img_20190402/' + str(fileid) + '.jpg', "wb") as f: f.write(img_b64decode) print('save successful!') def get_base64(fileids): scale = len(fileids) download_fails = [] for i, fileid in enumerate(fileids): a = '#' * int(i / 100) b = '.' * (int(scale / 100) - int(i / 100)) c = (i / scale) * 100 time.sleep(0.2) # Sit back seqNum = 'invoice_test_data_' + str(time.strftime("%Y%m%d%H%M%S", time.localtime())) request_params = json.dumps({'seqNum': seqNum, 'fileid': fileid}) print('request_params:', request_params) try: download(request_params) print("{:^3.2f}%[{}->{}]".format(c, a, b)) # Progress bar except: download_fails.append(request_params) return download_fails if __name__ == '__main__': excel_path = './20190401_company_product_data.xls' fileids = read_excel(excel_path) download_fails = get_base64(fileids) with open('./download_fail.txt', 'wb') as ff: for download_fail in download_fails: ff.write(','.join(download_fail) + '\n') ff.close()
You can use tqdm directly for progress bar. I'm too lazy to download it.
About the call example: https://blog.csdn.net/sinat_41898105/article/details/83818446
About calling methods: https://blog.csdn.net/wuchenlhy/article/details/79207355