DEV Community

MAK KA WAI
MAK KA WAI

Posted on

Daily Log - 17/08/2024

Since the Azure OpenAI Service Quota had been increased, I tried to use gpt-4o to read pdf and image. Success

import base64
import io
import os
import sys
from openai import AzureOpenAI
from PIL import Image
from pdf2image import convert_from_path
from dotenv import load_dotenv

load_dotenv()

POPPLER_PATH = r"E:\CustomPrograms\poppler-24.07.0\Library\bin"

# Azure OpenAI 設置
client = AzureOpenAI(
    api_key = os.environ['AZURE_OPENAI_API_KEY'],  
    api_version = os.getenv('AZURE_OPENAI_API_VERSION'),
    azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
)
development = azure_endpoint = os.getenv('AZURE_OPENAI_DEPLOYMENT')

def encode_image(image):
    if isinstance(image, str):  # 如果是文件路徑
        with open(image, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    elif isinstance(image, Image.Image):  # 如果是 PIL Image 對象
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format='JPEG')
        return base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
    else:
        raise ValueError("Unsupported image type")

def analyze_image_content(base64_image, prompt):
    # 確保 prompt 是 UTF-8 編碼的字符串
    prompt = prompt.encode('utf-8').decode('utf-8')

    try:
        response = client.chat.completions.create(
            model=development,  
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"API 請求錯誤:{str(e)}")
        return None

def analyze_image(image_path, prompt):
    base64_image = encode_image(image_path)
    return analyze_image_content(base64_image, prompt)

def analyze_pdf(pdf_path, prompt):
    try:
        images = convert_from_path(pdf_path, poppler_path=POPPLER_PATH)
    except Exception as e:
        print(f"錯誤:無法處理 PDF 文件。錯誤信息:{str(e)}")
        print("請確保已正確安裝 Poppler 並設置了正確的路徑。")
        print(f"當前設置的 Poppler 路徑:{POPPLER_PATH}")
        return None

    results = []
    for i, image in enumerate(images):
        base64_image = encode_image(image)
        result = analyze_image_content(base64_image, f"{prompt} (Page {i+1})")
        if result:
            results.append(result)
    return results

def analyze_document(file_path, prompt):
    """
    分析文檔(PDF或圖像)並返回結果。
    """
    _, file_extension = os.path.splitext(file_path)

    if file_extension.lower() == '.pdf':
        results = analyze_pdf(file_path, prompt)
        if results:
            for i, result in enumerate(results):
                print(f"Page {i+1} analysis:")
                print(result)
                print("---")
    elif file_extension.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.gif']:
        result = analyze_image(file_path, prompt)
        print("Image analysis:")
        print(result)
    else:
        print("不支持的文件類型。請提供 PDF 或圖像文件。")

def main():
    if len(sys.argv) < 2:
        print("使用方法: python app.py <文件路徑>")
        sys.exit(1)

    file_path = sys.argv[1]
    prompt = input("請輸入您的分析提示: ")

    try:
        analyze_document(file_path, prompt)
    except FileNotFoundError:
        print(f"錯誤:找不到文件 '{file_path}'。請確保文件路徑正確。")
    except Exception as e:
        print(f"發生錯誤: {e}")

if __name__ == "__main__":
    main()

Enter fullscreen mode Exit fullscreen mode

Top comments (0)