Office automation skills commonly used in Python

It's really tiring to go to work. I want to go to work efficiently and get off work quickly every day. So I sorted out eight office automation skills for you to improve office efficiency and go home with your wife after work. ( https://jq.qq.com/?_wv=1027&k=I2wOae3q )Ask if you have any questions!

1, Word document doc to docx

1.1 import Kit

import os
from win32com import client as wc

1.2 get the details of all doc files under the folder

# route
path="C:/Users/yyz/Desktop/python Office skills/data/doc turn docx/"   # Modify according to your own computer files

# Define an empty list and store the absolute path of the file
files = []
for file in os.listdir(path):
    if file.endswith(".doc"):
        files.append(path+file)
files

1.3 conversion documents

# Run word program
word = wc.Dispatch("Word.Application")
# for loop
i = 0
for file in files:
    try:
        doc = word.Documents.Open(file)    #Open word file
        doc.SaveAs("{}x".format(file), 12)   #Save as a file with Suffix ". Docx", where parameter 12 refers to docx file
        doc.Close()  #Close the original word file
        print(file +':Successful conversion')
        i +=1
    except:
        print(file +':transformation[Unsuccessful]')   
        files.append(file)  # If there is an error reading the file, add the file name to the files list and read it again
        pass
print('Convert file%i individual'%i)    
# Exit word    
word.Quit()

2, Text address batch to latitude and longitude
At work, address to latitude and longitude will be used for map visualization or distance calculation.

2.1 import Kit

# Import Toolkit
import pandas as pd
import json
from urllib.request import urlopen, quote
import requests

2.2 definition of conversion function

# Define function
def getlnglat(address):
    url = 'http://api.map.baidu.com/geocoding/v3/'
    output = 'json'
    ak = "I applied for it myself api"   # Baidu map API, you need to apply yourself
    address = quote(address) # Since the address variable in this paper is Chinese, in order to prevent random code, it is encoded with quote first
    uri = url + '?' + 'address=' + address  + '&output=' + output + '&ak=' + ak  +'&callback=showLocation%20'+'//GET%E8%AF%B7%E6%B1%82'
    res=requests.get(uri).text
    temp = json.loads(res) # Convert string to json
    lat = temp['result']['location']['lat']
    lng = temp['result']['location']['lng']
    return lng, lat   # longitude, latitude,

2.3 address translation
2.3.1 single address translation

# Single address translation
getlnglat('Gaojing village committee of Gaobeidian District Office of Chaoyang District of Beijing Municipality')
(116.52784003604923, 39.91806508560947)

2.3.2 batch address conversion

# Read data
data = pd.read_excel('C:/Users/yyz/Desktop/python Office skills/data/Address information.xlsx')
data

data['longitude'] = ''
data['latitude'] = ''
for i in range(data.shape[0]):
    try:
        data.iloc[i,2] = getlnglat(data.iloc[i,1])[0] # Longitude converts the address of row i and column 2 (column index is 1) into longitude and latitude, and assigns longitude to row i and column 3 (column index is 2)
        data.iloc[i,3] = getlnglat(data.iloc[i,1])[1] # latitude
    except:
        pass
    #print(i)
data

3, Longitude and latitude calculation distance
Setup Toolkit

pip install geopy

3.1 import Kit

from geopy.distance import geodesic

3.2 reading data

# Read data
data = pd.read_excel('C:/Users/yyz/Desktop/python Office skills/data/Longitude and latitude calculation distance.xlsx')
data

3.3 calculation distance

# Assign latitude and longitude to the variable to simplify
wd1 = data['Latitude 1'].tolist()
jd1 = data['Longitude 1'].tolist()
wd2 = data['Latitude 2'].tolist()
jd2 = data['Longitude 2'].tolist()

lis1 = []
for i in range(len(data)):
    j= geodesic((wd1[i],jd1[i]), (wd2[i],jd2[i])).km   # Latitude longitude latitude longitude
    lis1.append(j)
    #print(i)

data['distance'] = lis1
data

4, Baidu longitude and latitude to Gaode longitude and latitude
The company has two systems, which use different coordinate systems, and sometimes need to be converted

4.1 tool kit

# Import Toolkit
import math
import pandas as pd

4.2 defining functions

# Define conversion function
def bdToGaoDe(lon,lat):
    PI = 3.14159265358979324 * 3000.0 / 180.0
    x = lon - 0.0065
    y = lat - 0.006
    z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * PI)
    theta = math.atan2(y, x) - 0.000003 * math.cos(x * PI)
    lon = z * math.cos(theta)
    lat = z * math.sin(theta)
    return lon,lat

4.3 single conversion

# Single conversion
bdToGaoDe(116.512885, 39.847469)
(116.50647396357492, 39.84120409781157)

4.4 batch conversion

# Read data
data = pd.read_excel('C:/Users/yyz/Desktop/python Office skills/data/Baidu longitude and latitude to Gaode.xlsx')
data.head()

wd = data['latitude'].tolist()
jd = data['longitude'].tolist()
# Define an empty list
li1 = []
for i in range(len(data)):
    j  = bdToGaoDe(jd[i],wd[i])
    li1.append(j)
    
li1
data['longitude_re'] = [i[0] for i in li1]
data['latitude_re'] = [i[1] for i in li1]
data.head()

5, Batch consolidation of Excel files
5.1 tool kit

# Import Toolkit
import pandas as pd
import os

5.2 obtaining the list of documents

# Set file path
path = 'C:/Users/yyz/Desktop/python Office skills/data/Data merging/'
# Empty list for storing file paths
files = []
for file in os.listdir(path):
    if file.endswith(".xlsx"):
        files.append(path+file)

# view list
files

5.3 converting stored data

# Define an empty dataframe
data = pd.DataFrame()  

# Traverse all files
for file in files:
    datai = pd.read_excel(file)
    datai_len = len(datai)
    data = data.append(datai)   # Add to total data
    print('read%i Row data,Merged file%i column, name:%s'%(datai_len,len(data.columns),file.split('/')[-1]))     
    # Check whether all are read and whether the format is wrong
# Reset index    
data.reset_index(drop=True,inplace=True)

6, Batch conversion of Word files to pdf
Only the docx file can be transferred. If the doc file is transferred, an error will be reported and the toolkit will be installed

pip install docx2pdf

6.1 import Kit

# Installation kit:
#Import Toolkit
from docx2pdf import convert
import os

6.2 single conversion

# Single conversion
convert("c:/users/yyz/desktop/Rubik's cube formula.docx", "c:/users/yyz/desktop/excel note.pdf")

6.3 batch conversion

# file location
path = 'C:/Users/yyz/Desktop/python Office skills/data/word turn pdf/'
# Define an empty list to store the file list
files = []
for file in os.listdir(path):
    if file.endswith(".docx"):
        files.append(path+file)
files
for file in files:
   convert(file,file.split('.')[0]+'.pdf')
   print(file+'Successful conversion')

7, Batch reading of table data in word
Kit installation

pip install python-docx
1
7.1 import Kit

import docx
# Read word file
doc = docx.Document('C:/Users/yyz/Desktop/python Office skills/data/word information.docx')
# Gets a list of all table objects in the document
biaoges = doc.tables 

7.2 nonstandard forms

cells = biaoges[1]._cells
cells_lis = [[cell.text for cell in cells]]

import pandas as pd
import numpy as np
datai = pd.DataFrame(cells_lis)
datai = datai[[1,3,7,9,14,16,19,21]]
datai.columns = ['full name','Age','Native place','address','Work unit','Telephone','Party member or not','date of birth']
datai

7.3 specification data

# Get the first table row
rowi = len(biaoges[0].rows)
rowi
# Define empty list
lis1 = []
# The for loop gets the data of the first table
for i in range(1,rowi):  # Cycle from line 2
    lis1.append([biaoges[0].cell(i,0).text,
                 biaoges[0].cell(i,1).text,
                 biaoges[0].cell(i,2).text,
                 biaoges[0].cell(i,3).text,
                 biaoges[0].cell(i,4).text])






```python
# Create a dataframe
data1 = pd.DataFrame(lis1,columns=['date','category','quantity','Price','amount of money'])
data1

7.4 batch reading

import pandas as pd
import os
os.chdir('C:/Users/yyz/Desktop/python Office skills/data/word information/')
lis1=[]
for file in os.listdir('.'):
    if file.endswith('.docx'):
        doc = docx.Document('./'+file)
        biaoges = doc.tables
        rowi = len(biaoges[0].rows)
        for i in range(1,rowi):
            lis1.append([biaoges[0].cell(i,0).text,
                     biaoges[0].cell(i,1).text,
                     biaoges[0].cell(i,2).text,
                     biaoges[0].cell(i,3).text,
                     biaoges[0].cell(i,4).text])

# Create dataframe            
data1 = pd.DataFrame(lis1,columns=['date','category','quantity','Price','amount of money'])
data1

8, Bulk email with outlook
8.1 import Kit

import win32com.client as win32
import pandas as pd

8.2 reading data

# Read data
data1 = pd.read_excel('C:/Users/yyz/Desktop/python Send mail in bulk.xlsx',sheet_name='Send mail')
data1.fillna('',inplace=True)

8.3 sending mail

# Run outlook
outlook = win32.Dispatch("outlook.Application")   
# for loop send file
for i in range(data1.shape[0]):   
    mail = outlook.CreateItem(0)   # Create a mail object Win32 constants. olMailItem
    mail.To = data1.iloc[i,0]      #addressee
    mail.CC = data1.iloc[i,1]      #CC
    mail.Subject = data1.iloc[i,2]    #Mail subject
    mail.HTMLBody = data1.iloc[i,3]           # html format of message body
   # mail.Body = data1.iloc[i,3]              # Message body
    mail.Attachments.Add(data1.iloc[i,4])     # enclosure
    mail.Send() #send out
    i +=1
print('Send mail%i share'%i)

The code and tutorial are put on it, so I won't lose. I hope these eight tips can help you. Remember to like it! You can tell me what else you want to see in the comments area. See you in the next chapter.

Keywords: Python

Added by Jon12345 on Fri, 04 Mar 2022 11:05:50 +0200