Python 自动化脚本实战:从入门到生产力
Python 是自动化领域的首选语言:语法简洁、库生态丰富、跨平台。无论是文件处理、网页抓取、API 调用还是系统管理,Python 都能搞定。
1. 文件批量处理
批量重命名文件
import os
import re
def batch_rename(folder, pattern, replacement):
"""批量重命名文件夹中的文件"""
for filename in os.listdir(folder):
new_name = re.sub(pattern, replacement, filename)
if new_name != filename:
old_path = os.path.join(folder, filename)
new_path = os.path.join(folder, new_name)
os.rename(old_path, new_path)
print(f"Renamed: {filename} → {new_name}")
# 示例:给所有 .txt 文件加前缀
batch_rename("./docs", r"^", "2025-")
批量转换文件格式
import csv
import json
def csv_to_json(csv_file, json_file):
"""CSV 转 JSON"""
with open(csv_file, 'r') as f:
reader = csv.DictReader(f)
data = list(reader)
with open(json_file, 'w') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"Converted {len(data)} rows: {csv_file} → {json_file}")
2. 网页抓取
用 requests + BeautifulSoup 抓取
import requests
from bs4 import BeautifulSoup
def scrape_titles(url):
"""抓取网页中的所有标题"""
resp = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
soup = BeautifulSoup(resp.text, 'html.parser')
titles = []
for tag in ['h1', 'h2', 'h3']:
for elem in soup.find_all(tag):
titles.append(elem.get_text(strip=True))
return titles
用 Selenium 处理动态页面
from selenium import webdriver
from selenium.webdriver.common.by import By
def scrape_dynamic(url):
"""抓取 JavaScript 渲染的页面"""
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
elements = driver.find_elements(By.CSS_SELECTOR, '.result-item')
data = [e.text for e in elements]
driver.quit()
return data
3. API 调用自动化
REST API 封装
import requests
from typing import Optional, Dict, Any
class APIClient:
def __init__(self, base_url: str, api_key: str):
self.base_url = base_url
self.session = requests.Session()
self.session.headers.update({
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
})
def get(self, path: str, params: Optional[Dict] = None) -> Any:
resp = self.session.get(f"{self.base_url}{path}", params=params)
resp.raise_for_status()
return resp.json()
def post(self, path: str, data: Dict) -> Any:
resp = self.session.post(f"{self.base_url}{path}", json=data)
resp.raise_for_status()
return resp.json()
def put(self, path: str, data: Dict) -> Any:
resp = self.session.put(f"{self.base_url}{path}", json=data)
resp.raise_for_status()
return resp.json()
4. 系统管理
磁盘空间监控
import shutil
import smtplib
from email.mime.text import MIMEText
def check_disk_usage(threshold=80):
"""检查磁盘使用率,超过阈值发邮件"""
usage = shutil.disk_usage('/')
percent = usage.used / usage.total * 100
if percent > threshold:
msg = MIMEText(f"Disk usage: {percent:.1f}%")
msg['Subject'] = f"⚠️ Disk Alert: {percent:.1f}% used"
msg['From'] = 'monitor@example.com'
msg['To'] = 'admin@example.com'
with smtplib.SMTP('localhost') as smtp:
smtp.send_message(msg)
return percent
进程监控
import psutil
import time
def monitor_process(name, interval=60):
"""监控进程是否存在,不存在则重启"""
while True:
running = any(p.name() == name for p in psutil.process_iter(['name']))
if not running:
print(f"Process {name} not found, restarting...")
# subprocess.Popen([name])
time.sleep(interval)
5. 定时任务
用 schedule 库
import schedule
import time
def job():
print("Running scheduled task...")
# 每天 9:00 执行
schedule.every().day.at("09:00").do(job)
# 每小时执行
schedule.every().hour.do(job)
while True:
schedule.run_pending()
time.sleep(60)
总结
Python 自动化的核心优势:
- 快速开发:几行代码就能搞定复杂任务
- 丰富生态:requests, beautifulsoup, selenium, psutil...
- 跨平台:Windows/macOS/Linux 都能跑
- 易于维护:代码简洁,逻辑清晰
从今天开始,把重复性工作交给 Python 吧!
Top comments (0)