Eight Python practical scripts, quickly collect spare! _____________

The script is well written. Get off work early! In addition to writing program code, programmer's daily work inevitably needs to deal with related testing and verification work.

For example, access to a Web site has been impassable, you need to determine whether the address is accessible, what the server returns, and then determine what the problem is. To accomplish this task, if we blindly hope to use compiled language to write such code, the time and energy in practice is not enough, at this time we need to play the magic role of scripts!

It is no exaggeration to say that the ability to write efficient and practical script code directly affects a programmer's happy life [off time]. Here are eight practical Python scripts. When needed, they should be changed to direct use. It is recommended to collect them! ___________

1. Solve the problem of unzip scrambling under linux.

import os
import sys
import zipfile
import argparse

s = '\x1b[%d;%dm%s\x1b[0m'       

def unzip(path):

    file = zipfile.ZipFile(path,"r")
    if args.secret:
        file.setpassword(args.secret)

    for name in file.namelist():
        try:
            utf8name=name.decode('gbk')
            pathname = os.path.dirname(utf8name)
        except:
            utf8name=name
            pathname = os.path.dirname(utf8name)

        #print s % (1, 92, '  >> extracting:'), utf8name
        #pathname = os.path.dirname(utf8name)
        if not os.path.exists(pathname) and pathname != "":
            os.makedirs(pathname)
        data = file.read(name)
        if not os.path.exists(utf8name):
            try:
                fo = open(utf8name, "w")
                fo.write(data)
                fo.close
            except:
                pass
    file.close()

def main(argv):
    ######################################################
    # for argparse
    p = argparse.ArgumentParser(description='Solve unzip Random code')
    p.add_argument('xxx', type=str, nargs='*', \
        help='Command object.')
    p.add_argument('-s', '--secret', action='store', \
        default=None, help='Password')
    global args
    args = p.parse_args(argv[1:])
    xxx = args.xxx

    for path in xxx:
        if path.endswith('.zip'):
            if os.path.exists(path):
                print s % (1, 97, '  ++ unzip:'), path
                unzip(path)
            else:
                print s % (1, 91, '  !! file doesn\'t exist.'), path
        else:
            print s % (1, 91, '  !! file isn\'t a zip file.'), path

if __name__ == '__main__':
    argv = sys.argv
    main(argv)

2. Statistics the number of lines of code in the current root directory.

# coding=utf-8
import os
import time
# Setting the root directory
basedir = './'
filelists = []
# Specify the file type you want to count
whitelist = ['cpp', 'h']
#Traversing through files, recursively traversing all of the folders
def getFile(basedir):
    global filelists
    for parent,dirnames,filenames in os.walk(basedir):
        for filename in filenames:
            ext = filename.split('.')[-1]
            #Only the specified file type is counted, and some log and cache files are omitted.
            if ext in whitelist:
                filelists.append(os.path.join(parent,filename))
#Counting the number of rows in a row
def countLine(fname):
    count = 0
    # Look at the file as binary, read.
    for file_line in open(fname, 'rb').readlines():
        if file_line != '' and file_line != '\n': #Filter out empty lines
            count += 1
    print (fname + '----' , count)
    return count
if __name__ == '__main__' :
    startTime = time.clock()
    getFile(basedir)
    totalline = 0
    for filelist in filelists:
        totalline = totalline + countLine(filelist)
    print ('total lines:',totalline)
    print ('Done! Cost Time: %0.2f second' % (time.clock() - startTime))

3. Scan the current directory and all subdirectories and display the size.

import os
import sys      
try:
    directory = sys.argv[1]   
except IndexError:
    sys.exit("Must provide an argument.")

dir_size = 0   
fsizedicr = {'Bytes': 1,
             'Kilobytes': float(1) / 1024,
             'Megabytes': float(1) / (1024 * 1024),
             'Gigabytes': float(1) / (1024 * 1024 * 1024)}
for (path, dirs, files) in os.walk(directory):      
    for file in files:                              
        filename = os.path.join(path, file)
        dir_size += os.path.getsize(filename)       

fsizeList = [str(round(fsizedicr[key] * dir_size, 2)) + " " + key for key in fsizedicr] 

if dir_size == 0: print ("File Empty") 
else:
  for units in sorted(fsizeList)[::-1]: 
      print ("Folder Size: " + units)

4. Move all files from the source directory over 240 days to the target directory.

import shutil
import sys
import time
import os
import argparse

usage = 'python move_files_over_x_days.py -src [SRC] -dst [DST] -days [DAYS]'
description = 'Move files from src to dst if they are older than a certain number of days.  Default is 240 days'

args_parser = argparse.ArgumentParser(usage=usage, description=description)
args_parser.add_argument('-src', '--src', type=str, nargs='?', default='.', help='(OPTIONAL) Directory where files will be moved from. Defaults to current directory')
args_parser.add_argument('-dst', '--dst', type=str, nargs='?', required=True, help='(REQUIRED) Directory where files will be moved to.')
args_parser.add_argument('-days', '--days', type=int, nargs='?', default=240, help='(OPTIONAL) Days value specifies the minimum age of files to be moved. Default is 240.')
args = args_parser.parse_args()

if args.days < 0:
    args.days = 0

src = args.src  # Setting source directory
dst = args.dst  # Setting the target directory
days = args.days # Set up days
now = time.time()  # Get the current time

if not os.path.exists(dst):
    os.mkdir(dst)

for f in os.listdir(src):  # Traverse all files in the source directory
    if os.stat(f).st_mtime < now - days * 86400:  # Judge whether it is more than 240 days
        if os.path.isfile(f):  # Check if it is a document
            shutil.move(f, dst)  # move file

5. Scan the script directory and give the count of different types of scripts.

import os                                                                    
import shutil                                                                
from time import strftime                                                

logsdir="c:\logs\puttylogs"                                            
zipdir="c:\logs\puttylogs\zipped_logs"                            
zip_program="zip.exe"                                                

for files in os.listdir(logsdir):                                        
    if files.endswith(".log"):                                        
        files1=files+"."+strftime("%Y-%m-%d")+".zip"        
        os.chdir(logsdir)                                                 
        os.system(zip_program + " " +  files1 +" "+ files)    
        shutil.move(files1, zipdir)                                     
        os.remove(files)                                                    

6. Download the algorithm of Leetcode.

import sys
import re
import os
import argparse
import requests
from lxml import html as lxml_html

try:
    import html
except ImportError:
    import HTMLParser
    html = HTMLParser.HTMLParser()

try:
    import cPickle as pk
except ImportError:
    import pickle as pk

class LeetcodeProblems(object):
    def get_problems_info(self):
        leetcode_url = 'https://leetcode.com/problemset/algorithms'
        res = requests.get(leetcode_url)
        if not res.ok:
            print('request error')
            sys.exit()
        cm = res.text
        cmt = cm.split('tbody>')[-2]
        indexs = re.findall(r'<td>(\d+)</td>', cmt)
        problem_urls = ['https://leetcode.com' + url \
                        for url in re.findall(
                            r'<a href="(/problems/.+?)"', cmt)]
        levels = re.findall(r"<td value='\d*'>(.+?)</td>", cmt)
        tinfos = zip(indexs, levels, problem_urls)
        assert (len(indexs) == len(problem_urls) == len(levels))
        infos = []
        for info in tinfos:
            res = requests.get(info[-1])
            if not res.ok:
                print('request error')
                sys.exit()
            tree = lxml_html.fromstring(res.text)
            title = tree.xpath('//meta[@property="og:title"]/@content')[0]
            description = tree.xpath('//meta[@property="description"]/@content')
            if not description:
                description = tree.xpath('//meta[@property="og:description"]/@content')[0]
            else:
                description = description[0]
            description = html.unescape(description.strip())
            tags = tree.xpath('//div[@id="tags"]/following::a[@class="btn btn-xs btn-primary"]/text()')
            infos.append(
                {
                    'title': title,
                    'level': info[1],
                    'index': int(info[0]),
                    'description': description,
                    'tags': tags
                }
            )

        with open('leecode_problems.pk', 'wb') as g:
            pk.dump(infos, g)
        return infos

    def to_text(self, pm_infos):
        if self.args.index:
            key = 'index'
        elif self.args.title:
            key = 'title'
        elif self.args.tag:
            key = 'tags'
        elif self.args.level:
            key = 'level'
        else:
            key = 'index'

        infos = sorted(pm_infos, key=lambda i: i[key])

        text_template = '## {index} - {title}\n' \
            '~{level}~  {tags}\n' \
            '{description}\n' + '\n' * self.args.line
        text = ''
        for info in infos:
            if self.args.rm_blank:
                info['description'] = re.sub(r'[\n\r]+', r'\n', info['description'])
            text += text_template.format(**info)

        with open('leecode problems.txt', 'w') as g:
            g.write(text)

    def run(self):
        if os.path.exists('leecode_problems.pk') and not self.args.redownload:
            with open('leecode_problems.pk', 'rb') as f:
                pm_infos = pk.load(f)
        else:
            pm_infos = self.get_problems_info()

        print('find %s problems.' % len(pm_infos))
        self.to_text(pm_infos)

def handle_args(argv):
    p = argparse.ArgumentParser(description='extract all leecode problems to location')
    p.add_argument('--index', action='store_true', help='sort by index')
    p.add_argument('--level', action='store_true', help='sort by level')
    p.add_argument('--tag', action='store_true', help='sort by tag')
    p.add_argument('--title', action='store_true', help='sort by title')
    p.add_argument('--rm_blank', action='store_true', help='remove blank')
    p.add_argument('--line', action='store', type=int, default=10, help='blank of two problems')
    p.add_argument('-r', '--redownload', action='store_true', help='redownload data')
    args = p.parse_args(argv[1:])
    return args

def main(argv):
    args = handle_args(argv)
    x = LeetcodeProblems()
    x.args = args
    x.run()

if __name__ == '__main__':
    argv = sys.argv
    main(argv)

7. Convert Markdown to HTML.

import sys
import os

from bs4 import BeautifulSoup
import markdown

class MarkdownToHtml:

    headTag = '<head><meta charset="utf-8" /></head>'

    def __init__(self,cssFilePath = None):
        if cssFilePath != None:
            self.genStyle(cssFilePath)

    def genStyle(self,cssFilePath):
        with open(cssFilePath,'r') as f:
            cssString = f.read()
        self.headTag = self.headTag[:-7] + '<style type="text/css">{}</style>'.format(cssString) + self.headTag[-7:]

    def markdownToHtml(self, sourceFilePath, destinationDirectory = None, outputFileName = None):
        if not destinationDirectory:
            # If the output directory is undefined, the source file directory (note that it should be converted to an absolute path) is used as the output directory.
            destinationDirectory = os.path.dirname(os.path.abspath(sourceFilePath))
        if not outputFileName:
            # If the output file name is undefined, the input file name is used.
            outputFileName = os.path.splitext(os.path.basename(sourceFilePath))[0] + '.html'
        if destinationDirectory[-1] != '/':
            destinationDirectory += '/'
        with open(sourceFilePath,'r', encoding='utf8') as f:
            markdownText = f.read()
        # Compile the original HTML text
        rawHtml = self.headTag + markdown.markdown(markdownText,output_format='html5')
        # Format HTML text as a more readable format
        beautifyHtml = BeautifulSoup(rawHtml,'html5lib').prettify()
        with open(destinationDirectory + outputFileName, 'w', encoding='utf8') as f:
            f.write(beautifyHtml)

if __name__ == "__main__":
    mth = MarkdownToHtml()
    # Make a shallow copy of the command line parameter list, without the script file name
    argv = sys.argv[1:]
    # The current list argv may contain elements other than the source file path (that is, option information)
    # When the program finally traverses the list argv to compile markdown, all the elements in the list must be the source file path.
    outputDirectory = None
    if '-s' in argv:
        cssArgIndex = argv.index('-s') +1
        cssFilePath = argv[cssArgIndex]
        # Check whether the style sheet file path is valid
        if not os.path.isfile(cssFilePath):
            print('Invalid Path: '+cssFilePath)
            sys.exit()
        mth.genStyle(cssFilePath)
        # pop order cannot be changed at will
        argv.pop(cssArgIndex)
        argv.pop(cssArgIndex-1)
    if '-o' in argv:
        dirArgIndex = argv.index('-o') +1
        outputDirectory = argv[dirArgIndex]
        # Check whether the output directory is valid
        if not os.path.isdir(outputDirectory):
            print('Invalid Directory: ' + outputDirectory)
            sys.exit()
        # pop order cannot be changed at will
        argv.pop(dirArgIndex)
        argv.pop(dirArgIndex-1)
    # So far, the elements in the list argv are all source file paths
    # Traverse all source file paths
    for filePath in argv:
        # Determine whether the file path is valid
        if os.path.isfile(filePath):
            mth.markdownToHtml(filePath, outputDirectory)
        else:
            print('Invalid Path: ' + filePath)

8. Text file encoding detection and conversion.

import sys
import os
import argparse
from chardet.universaldetector import UniversalDetector

parser = argparse.ArgumentParser(description = 'Text File Coding Detection and Conversion')
parser.add_argument('filePaths', nargs = '+',
                   help = 'Detection or conversion of file paths')
parser.add_argument('-e', '--encoding', nargs = '?', const = 'UTF-8',
                   help = '''
//Target coding. The codes supported are:
ASCII, (Default) UTF-8 (with or without a BOM), UTF-16 (with a BOM),
UTF-32 (with a BOM), Big5, GB2312/GB18030, EUC-TW, HZ-GB-2312, ISO-2022-CN, EUC-JP, SHIFT_JIS, ISO-2022-JP,
ISO-2022-KR, KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251, ISO-8859-2, windows-1250, EUC-KR,
ISO-8859-5, windows-1251, ISO-8859-1, windows-1252, ISO-8859-7, windows-1253, ISO-8859-8, windows-1255, TIS-620
''')
parser.add_argument('-o', '--output',
                   help = 'Output catalogue')
# Parse the parameters to get a Namespace object
args = parser.parse_args()
# If the output directory is not empty, the conversion is considered to be open. If the conversion code is not specified, the default is UTF-8.
if args.output != None:
    if not args.encoding:
        # Default encoding UTF-8
        args.encoding = 'UTF-8'
    # Check whether the output directory provided by the user is valid
    if not os.path.isdir(args.output):
        print('Invalid Directory: ' + args.output)
        sys.exit()
    else:
        if args.output[-1] != '/':
            args.output += '/'
# Instantiating a Universal Detector
detector = UniversalDetector()
print()
print('Encoding (Confidence)',':','File path')
for filePath in args.filePaths:
    # Check whether the file path is valid or not, and skip if it is invalid
    if not os.path.isfile(filePath):
        print('Invalid Path: ' + filePath)
        continue
    # Reset Detector
    detector.reset()
    # Read files in binary mode
    for each in open(filePath, 'rb'):
        # Detector reads data
        detector.feed(each)
        # Jump out of the loop if the detection is complete
        if detector.done:
            break
    # Turn off the detector
    detector.close()
    # Read results
    charEncoding = detector.result['encoding']
    confidence = detector.result['confidence']
    # Print information
    if charEncoding is None:
        charEncoding = 'Unknown'
        confidence = 0.99
    print('{} {:>12} : {}'.format(charEncoding.rjust(8),
        '('+str(confidence*100)+'%)', filePath))
    if args.encoding and charEncoding != 'Unknown' and confidence > 0.6:
        # Overwrite source files if no output directory is set
        outputPath = args.output + os.path.basename(filePath) if args.output else filePath
        with open(filePath, 'r', encoding = charEncoding, errors = 'replace') as f:
            temp = f.read()
        with open(outputPath, 'w', encoding = args.encoding, errors = 'replace') as f:
            f.write(temp)

The last two scripts were selected for the course in the laboratory building.< Writing a series of practical scripts using Python 3 The course has a detailed explanation of the implementation process of these two scripts. Interested students can go directly to the experimental building for learning!

Knowing Column Synchronization: https://zhuanlan.zhihu.com/p/...

Keywords: Python encoding Windows Linux

Added by eyedol on Wed, 09 Oct 2019 20:01:18 +0300