Since the Azure OpenAI Service Quota had been increased, I tried to use gpt-4o to read pdf and image. Success
import base64
import io
import os
import sys
from openai import AzureOpenAI
from PIL import Image
from pdf2image import convert_from_path
from dotenv import load_dotenv
load_dotenv()
POPPLER_PATH = r"E:\CustomPrograms\poppler-24.07.0\Library\bin"
# Azure OpenAI 設置
client = AzureOpenAI(
api_key = os.environ['AZURE_OPENAI_API_KEY'],
api_version = os.getenv('AZURE_OPENAI_API_VERSION'),
azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
)
development = azure_endpoint = os.getenv('AZURE_OPENAI_DEPLOYMENT')
def encode_image(image):
if isinstance(image, str): # 如果是文件路徑
with open(image, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
elif isinstance(image, Image.Image): # 如果是 PIL Image 對象
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='JPEG')
return base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
else:
raise ValueError("Unsupported image type")
def analyze_image_content(base64_image, prompt):
# 確保 prompt 是 UTF-8 編碼的字符串
prompt = prompt.encode('utf-8').decode('utf-8')
try:
response = client.chat.completions.create(
model=development,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
max_tokens=500
)
return response.choices[0].message.content
except Exception as e:
print(f"API 請求錯誤:{str(e)}")
return None
def analyze_image(image_path, prompt):
base64_image = encode_image(image_path)
return analyze_image_content(base64_image, prompt)
def analyze_pdf(pdf_path, prompt):
try:
images = convert_from_path(pdf_path, poppler_path=POPPLER_PATH)
except Exception as e:
print(f"錯誤:無法處理 PDF 文件。錯誤信息:{str(e)}")
print("請確保已正確安裝 Poppler 並設置了正確的路徑。")
print(f"當前設置的 Poppler 路徑:{POPPLER_PATH}")
return None
results = []
for i, image in enumerate(images):
base64_image = encode_image(image)
result = analyze_image_content(base64_image, f"{prompt} (Page {i+1})")
if result:
results.append(result)
return results
def analyze_document(file_path, prompt):
"""
分析文檔(PDF或圖像)並返回結果。
"""
_, file_extension = os.path.splitext(file_path)
if file_extension.lower() == '.pdf':
results = analyze_pdf(file_path, prompt)
if results:
for i, result in enumerate(results):
print(f"Page {i+1} analysis:")
print(result)
print("---")
elif file_extension.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.gif']:
result = analyze_image(file_path, prompt)
print("Image analysis:")
print(result)
else:
print("不支持的文件類型。請提供 PDF 或圖像文件。")
def main():
if len(sys.argv) < 2:
print("使用方法: python app.py <文件路徑>")
sys.exit(1)
file_path = sys.argv[1]
prompt = input("請輸入您的分析提示: ")
try:
analyze_document(file_path, prompt)
except FileNotFoundError:
print(f"錯誤:找不到文件 '{file_path}'。請確保文件路徑正確。")
except Exception as e:
print(f"發生錯誤: {e}")
if __name__ == "__main__":
main()
Top comments (0)