openai-whisper从多媒体抽取文本（windows GPU 版本）

#openai #python #tutorial #ai

GPU 型号

NVIDIA GeForce GTX 750 Ti

依赖

python
三方包

pip uninstall torch torchvision torchaudio -y
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install -U openai-whisper

echo "智能批量转录：自动选择最大可用 Whisper 模型"

# 方法1：尝试用 nvidia-smi 获取显存（兼容 Git Bash）
TOTAL_MEM=0
if command -v nvidia-smi > /dev/null 2>&1; then
    MEM_LINE=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -1)
    if [[ $MEM_LINE =~ ^[0-9]+$ ]]; then
        TOTAL_MEM=$MEM_LINE
    fi
fi

# 方法2：如果 nvidia-smi 失败，fallback 到 PyTorch 检测（推荐！）
if [ $TOTAL_MEM -eq 0 ] && command -v python > /dev/null 2>&1; then
    MEM_PY=$(python -c "
import torch
if torch.cuda.is_available():
    print(torch.cuda.get_device_properties(0).total_memory // 1024 // 1024)
else:
    print(0)
" 2>/dev/null || echo 0)
    TOTAL_MEM=$((MEM_PY))
fi

echo "检测到显存: ${TOTAL_MEM} MiB"

# 正确模型选择逻辑（显存越大，模型越大）
if [ $TOTAL_MEM -ge 20000 ]; then
    MODEL="large-v3"
elif [ $TOTAL_MEM -ge 14000 ]; then
    MODEL="medium"
elif [ $TOTAL_MEM -ge 8000 ]; then
    MODEL="small"
elif [ $TOTAL_MEM -ge 4000 ]; then
    MODEL="base"
else
    MODEL="tiny"
fi

echo "自动选择模型: $MODEL"
echo "=================================================="

# 检查 whisper 命令是否存在
if ! command -v whisper > /dev/null 2>&1; then
    echo "错误：whisper 命令未找到！请先 pip install openai-whisper"
    exit 1
fi

# 批量转录
count=0
for mp3 in *.mp3; do
    [[ -f "$mp3" ]] || continue
    txt="${mp3%.mp3}.txt"

    if [ ! -f "$txt" ]; then
        echo "转录: $mp3 → $txt (模型: $MODEL)"
        whisper "$mp3" \
            --model "$MODEL" \
            --device cuda \
            --output_format txt \
            --verbose False \
            --output_dir .
        ((count++))
    else
        echo "跳过: $txt 已存在"
    fi
done

echo "=================================================="
echo "全部完成！本次新增转录 $count 个文件。"

DEV Community

openai-whisper从多媒体抽取文本（windows GPU 版本）

GPU 型号

依赖

Top comments (0)