DEV Community

TK Lin
TK Lin

Posted on

๐Ÿ’ฐ Claude API ใ‚ณใ‚นใƒˆๅ‰Šๆธ›่ก“๏ผšๅ…ฌๅผใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใงๆœ€ๅคง90%็ฏ€็ด„๏ผ

๐Ÿ’ฐ Claude API ใ‚ณใ‚นใƒˆๅ‰Šๆธ›่ก“๏ผšๅ…ฌๅผใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใงๆœ€ๅคง90%็ฏ€็ด„๏ผ

ใพใ ๅฎšไพกใงClaude APIใ‚’ไฝฟใฃใฆใ„ใพใ›ใ‚“ใ‹๏ผŸๅฎŸใฏๅ…ฌๅผใŒๆไพ›ใ™ใ‚‹3ใคใฎๆฉŸ่ƒฝใ‚’ไฝฟใ†ใ ใ‘ใงใ€APIใ‚ณใ‚นใƒˆใ‚’ๆœ€ๅคง90%ใพใงๅ‰Šๆธ›ใงใใ‚‹ใ‚“ใงใ™ใ€‚


๐ŸŽฏ ใฏใ˜ใ‚ใซ๏ผšใชใœใ‚ณใ‚นใƒˆๅ‰Šๆธ›ใŒ้‡่ฆใชใฎใ‹

็งใŸใกๅ’Œๅฟƒๆ‘ใงใฏใ€28ๅŒนใฎ็Œซใจ็ŠฌใŸใกใฎๅ†™็œŸใƒปๅ‹•็”ปใ‚’ๆฏŽๆ—ฅAIๅˆ†ๆžใ—ใฆใ„ใพใ™ใ€‚ๅ‹•็‰ฉ่ญ˜ๅˆฅใ€ใ‚ณใƒณใƒ†ใƒณใƒ„็”Ÿๆˆใ€่‡ชๅ‹•ๆŠ•็จฟ...Claude APIใฎๅ‘ผใณๅ‡บใ—ใฏ1ๆ—ฅๆ•ฐ็™พๅ›žใซใ‚‚ๅŠใณใพใ™ใ€‚

ๆœ€ๅˆใฎๆœˆใฎ่ซ‹ๆฑ‚ๆ›ธใ‚’่ฆ‹ใŸๆ™‚ใ€ๆญฃ็›ด้ฉšใใพใ—ใŸใ€‚ใ€Œใ“ใ‚Œใ€็ถšใ‘ใ‚‰ใ‚Œใ‚‹ใฎ...๏ผŸใ€

ใ—ใ‹ใ—ใ€Anthropicๅ…ฌๅผใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆใ‚’่ชญใฟ่พผใ‚“ใ ็ตๆžœใ€3ใคใฎ็ฏ€็ด„ใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใ‚’็™บ่ฆ‹ใ€‚ไปŠใงใฏๅŒใ˜ๅ‡ฆ็†้‡ใงใ‚ณใ‚นใƒˆใ‚’80%ไปฅไธŠๅ‰Šๆธ›ใงใใฆใ„ใพใ™ใ€‚

ไปŠๆ—ฅใฏใใฎ็ง˜ๅฏ†ใ‚’ๅ…จใฆๅ…ฌ้–‹ใ—ใพใ™ใ€‚


๐Ÿ“Š ไธ‰ๅคง็ฏ€็ด„ใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏ

1๏ธโƒฃ Batch API๏ผˆ50%ใ‚ชใƒ•๏ผ‰

ๅณๆ™‚ใƒฌใ‚นใƒใƒณใ‚นใŒไธ่ฆใชๅ‡ฆ็†ใซๆœ€้ฉ๏ผ

Batch APIใฏใ€ใƒชใ‚ฏใ‚จใ‚นใƒˆใ‚’24ๆ™‚้–“ไปฅๅ†…ใซๅ‡ฆ็†ใ™ใ‚‹ไปฃใ‚ใ‚Šใซใ€50%ๅ‰ฒๅผ•ใ‚’ๆไพ›ใ—ใพใ™ใ€‚

้ฉ็”จใ‚ทใƒผใƒณ

  • ๅคง้‡ใฎ็”ปๅƒๅˆ†ๆž
  • ใƒใƒƒใƒ็ฟป่จณๅ‡ฆ็†
  • ๅคœ้–“ใฎๅฎšๆœŸๅ‡ฆ็†
  • ใƒฌใƒใƒผใƒˆ็”Ÿๆˆ

Python ใ‚ณใƒผใƒ‰ไพ‹

import anthropic
import json

client = anthropic.Anthropic()

# ใƒใƒƒใƒใƒชใ‚ฏใ‚จใ‚นใƒˆใ‚’ไฝœๆˆ
def create_batch_request(prompts: list[str]) -> str:
    """่ค‡ๆ•ฐใฎใƒ—ใƒญใƒณใƒ—ใƒˆใ‚’ใƒใƒƒใƒๅ‡ฆ็†ใ™ใ‚‹"""

    requests = []
    for i, prompt in enumerate(prompts):
        requests.append({
            "custom_id": f"request-{i}",
            "params": {
                "model": "claude-sonnet-4-20250514",
                "max_tokens": 1024,
                "messages": [
                    {"role": "user", "content": prompt}
                ]
            }
        })

    # ใƒใƒƒใƒใ‚’ไฝœๆˆ
    batch = client.batches.create(requests=requests)

    print(f"โœ… ใƒใƒƒใƒไฝœๆˆๅฎŒไบ†: {batch.id}")
    print(f"๐Ÿ“Š ใƒชใ‚ฏใ‚จใ‚นใƒˆๆ•ฐ: {len(prompts)}")
    print(f"๐Ÿ’ฐ ็ฏ€็ด„็އ: 50%!")

    return batch.id

# ไฝฟ็”จไพ‹
prompts = [
    "ใ“ใฎ็Œซใฎ็‰นๅพดใ‚’่ชฌๆ˜Žใ—ใฆใใ ใ•ใ„",
    "ใ“ใฎ็Šฌใฎๅ“็จฎใ‚’ๅˆคๅฎšใ—ใฆใใ ใ•ใ„",
    "ใ“ใฎๅ‹•็‰ฉใฎ่กŒๅ‹•ใ‚’ๅˆ†ๆžใ—ใฆใใ ใ•ใ„"
]

batch_id = create_batch_request(prompts)
Enter fullscreen mode Exit fullscreen mode

๐Ÿ’ก ใƒใ‚คใƒณใƒˆ

  • ็ตๆžœใฏ24ๆ™‚้–“ไปฅๅ†…ใซ่ฟ”ๅด
  • ๅคง้‡ๅ‡ฆ็†ใปใฉๅŠนๆžœ็š„
  • client.batches.retrieve(batch_id) ใง็Šถๆ…‹็ขบ่ช

2๏ธโƒฃ Prompt Caching๏ผˆๆœ€ๅคง90%ใ‚ชใƒ•๏ผ‰

็นฐใ‚Š่ฟ”ใ—ไฝฟใ†ใƒ—ใƒญใƒณใƒ—ใƒˆใ‚’ใ‚ญใƒฃใƒƒใ‚ทใƒฅ๏ผ

ๅŒใ˜ใ‚ทใ‚นใƒ†ใƒ ใƒ—ใƒญใƒณใƒ—ใƒˆใ‚„้•ทใ„ใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆใ‚’ไฝ•ๅบฆใ‚‚้€ไฟกใ—ใฆใ„ใพใ›ใ‚“ใ‹๏ผŸPrompt Cachingใ‚’ไฝฟใˆใฐใ€ใ‚ญใƒฃใƒƒใ‚ทใƒฅใ•ใ‚ŒใŸ้ƒจๅˆ†ใฏ90%ใ‚ชใƒ•ใซใชใ‚Šใพใ™ใ€‚

้ฉ็”จใ‚ทใƒผใƒณ

  • ้•ทใ„ใ‚ทใ‚นใƒ†ใƒ ใƒ—ใƒญใƒณใƒ—ใƒˆ
  • ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆๅˆ†ๆž๏ผˆๅŒใ˜ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆใซ่ค‡ๆ•ฐ่ณชๅ•๏ผ‰
  • Few-shotไพ‹ใฎๅ†ๅˆฉ็”จ
  • RAGใฎใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆ

Python ใ‚ณใƒผใƒ‰ไพ‹

import anthropic

client = anthropic.Anthropic()

def analyze_with_cache(document: str, questions: list[str]):
    """ๅŒใ˜ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆใซๅฏพใ—ใฆ่ค‡ๆ•ฐใฎ่ณชๅ•ใ‚’ใ‚ญใƒฃใƒƒใ‚ทใƒฅๆดป็”จใงๅ‡ฆ็†"""

    results = []

    for i, question in enumerate(questions):
        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=1024,
            system=[
                {
                    "type": "text",
                    "text": "ใ‚ใชใŸใฏๅ‹•็‰ฉ่กŒๅ‹•ๅˆ†ๆžใฎๅฐ‚้–€ๅฎถใงใ™ใ€‚",
                },
                {
                    "type": "text",
                    "text": document,
                    "cache_control": {"type": "ephemeral"}  # ๐Ÿ”‘ ใ‚ญใƒฃใƒƒใ‚ทใƒฅๆŒ‡ๅฎš
                }
            ],
            messages=[
                {"role": "user", "content": question}
            ]
        )

        # ใ‚ญใƒฃใƒƒใ‚ทใƒฅ็Šถๆณใ‚’็ขบ่ช
        usage = response.usage
        cache_read = getattr(usage, 'cache_read_input_tokens', 0)
        cache_creation = getattr(usage, 'cache_creation_input_tokens', 0)

        if cache_read > 0:
            print(f"โœ… ่ณชๅ•{i+1}: ใ‚ญใƒฃใƒƒใ‚ทใƒฅใƒ’ใƒƒใƒˆ! {cache_read}ใƒˆใƒผใ‚ฏใƒณ (90%ใ‚ชใƒ•)")
        elif cache_creation > 0:
            print(f"๐Ÿ“ ่ณชๅ•{i+1}: ใ‚ญใƒฃใƒƒใ‚ทใƒฅไฝœๆˆ {cache_creation}ใƒˆใƒผใ‚ฏใƒณ")

        results.append(response.content[0].text)

    return results

# ไฝฟ็”จไพ‹๏ผš้•ทใ„ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆใซ่ค‡ๆ•ฐใฎ่ณชๅ•
document = """
[ๅ’Œๅฟƒๆ‘ใฎๅ‹•็‰ฉใƒ—ใƒญใƒ•ใ‚ฃใƒผใƒซ - 10,000ๆ–‡ๅญ—ใฎใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆ...]
"""

questions = [
    "Jellyใฎๆ€งๆ ผใ‚’ๆ•™ใˆใฆใใ ใ•ใ„",
    "Goldใฎๅฅฝใใช้ฃŸใน็‰ฉใฏ๏ผŸ",
    "Arielใฎ็‰นๅพด็š„ใช่กŒๅ‹•ใƒ‘ใ‚ฟใƒผใƒณใฏ๏ผŸ"
]

results = analyze_with_cache(document, questions)
# โ†’ 2ๅ›ž็›ฎไปฅ้™ใฎ่ณชๅ•ใงใ‚ญใƒฃใƒƒใ‚ทใƒฅใƒ’ใƒƒใƒˆใ€90%็ฏ€็ด„๏ผ
Enter fullscreen mode Exit fullscreen mode

๐Ÿ’ก ใƒใ‚คใƒณใƒˆ

  • ใ‚ญใƒฃใƒƒใ‚ทใƒฅใฏ5ๅˆ†้–“ๆœ‰ๅŠน
  • 1024ใƒˆใƒผใ‚ฏใƒณไปฅไธŠใงใ‚ญใƒฃใƒƒใ‚ทใƒฅๅฏ่ƒฝ
  • cache_control: {"type": "ephemeral"} ใ‚’ไป˜ใ‘ใ‚‹ใ ใ‘

3๏ธโƒฃ Extended Thinking๏ผˆๆ€่€ƒใƒˆใƒผใ‚ฏใƒณ็ด„80%ใ‚ชใƒ•๏ผ‰

่ค‡้›‘ใชๆŽจ่ซ–ใ‚ฟใ‚นใ‚ฏใซๆœ€้ฉ๏ผ

Extended Thinkingใฏใ€Claudeใซใ€Œ่€ƒใˆใ‚‹ๆ™‚้–“ใ€ใ‚’ไธŽใˆใ‚‹ๆฉŸ่ƒฝใ€‚ๆ€่€ƒใƒˆใƒผใ‚ฏใƒณใฏ้€šๅธธใฎ็ด„80%ใ‚ชใƒ•ใฎ็‰นๅˆฅไพกๆ ผใงใ™ใ€‚

้ฉ็”จใ‚ทใƒผใƒณ

  • ่ค‡้›‘ใช่ซ–็†ๆŽจ่ซ–
  • ใ‚ณใƒผใƒ‰็”Ÿๆˆใƒปใƒ‡ใƒใƒƒใ‚ฐ
  • ๆ•ฐๅญฆ็š„ๅ•้กŒ่งฃๆฑบ
  • ๆˆฆ็•ฅ็ซ‹ๆกˆ

Python ใ‚ณใƒผใƒ‰ไพ‹

import anthropic

client = anthropic.Anthropic()

def solve_complex_problem(problem: str):
    """Extended Thinkingใง่ค‡้›‘ใชๅ•้กŒใ‚’่งฃๆฑบ"""

    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=16000,
        thinking={
            "type": "enabled",
            "budget_tokens": 10000  # ๆ€่€ƒใซไฝฟใ†ใƒˆใƒผใ‚ฏใƒณๆ•ฐ
        },
        messages=[
            {"role": "user", "content": problem}
        ]
    )

    # ๆ€่€ƒใƒ—ใƒญใ‚ปใ‚นใจๅ›ž็ญ”ใ‚’ๅˆ†้›ข
    thinking_content = None
    answer_content = None

    for block in response.content:
        if block.type == "thinking":
            thinking_content = block.thinking
        elif block.type == "text":
            answer_content = block.text

    # ใ‚ณใ‚นใƒˆ่จˆ็ฎ—
    usage = response.usage
    input_tokens = usage.input_tokens
    output_tokens = usage.output_tokens

    print(f"๐Ÿ“Š ๅ…ฅๅŠ›ใƒˆใƒผใ‚ฏใƒณ: {input_tokens}")
    print(f"๐Ÿ“Š ๅ‡บๅŠ›ใƒˆใƒผใ‚ฏใƒณ: {output_tokens}")
    print(f"๐Ÿ’ญ ๆ€่€ƒใƒˆใƒผใ‚ฏใƒณใฏ็ด„80%ใ‚ชใƒ•!")

    return {
        "thinking": thinking_content,
        "answer": answer_content
    }

# ไฝฟ็”จไพ‹
problem = """
ๅ’Œๅฟƒๆ‘ใฎ28ๅŒนใฎๅ‹•็‰ฉใŸใกใฎๆœ€้ฉใช็ตฆ้คŒใ‚นใ‚ฑใ‚ธใƒฅใƒผใƒซใ‚’่จญ่จˆใ—ใฆใใ ใ•ใ„ใ€‚
ๆกไปถ๏ผš
- ็Œซ23ๅŒนใ€็Šฌ5ๅŒน
- ๆœใƒปๅค•ใฎ2ๅ›ž็ตฆ้คŒ
- ็‰นๅˆฅ้ฃŸใŒๅฟ…่ฆใชๅ‹•็‰ฉใŒ3ๅŒน
- ใ‚นใ‚ฟใƒƒใƒ•ใฏ2ๅ
"""

result = solve_complex_problem(problem)
print(f"\n๐ŸŽฏ ๅ›ž็ญ”:\n{result['answer']}")
Enter fullscreen mode Exit fullscreen mode

๐Ÿ’ก ใƒใ‚คใƒณใƒˆ

  • ๆ€่€ƒใƒˆใƒผใ‚ฏใƒณใฏๅ‡บๅŠ›ใซๅซใพใ‚Œใชใ„
  • budget_tokens ใงๆ€่€ƒ้‡ใ‚’ๅˆถๅพก
  • ่ค‡้›‘ใชๅ•้กŒใปใฉๅŠนๆžœ็š„

๐Ÿ“ˆ ็ฏ€็ด„ใƒฌใƒใƒผใƒˆ๏ผšๅฎŸ้š›ใฎๅŠนๆžœ

ๅ’Œๅฟƒๆ‘ใงใฎ1ใƒถๆœˆ้–“ใฎๅฎŸ็ธพใ‚’ๅ…ฌ้–‹ใ—ใพใ™๏ผš

โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
โ•‘           ๐Ÿ’ฐ Claude API ๆœˆ้–“็ฏ€็ด„ใƒฌใƒใƒผใƒˆ                      โ•‘
โ• โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•ฃ
โ•‘                                                              โ•‘
โ•‘  ๐Ÿ“Š ๅ‡ฆ็†ๅ†…่จณ                                                  โ•‘
โ•‘  โ”œโ”€ ๅ‹•็‰ฉ่ญ˜ๅˆฅ (Batch)     : 3,000ๅ›ž/ๆœˆ โ†’ 50%ใ‚ชใƒ•             โ•‘
โ•‘  โ”œโ”€ ใ‚ณใƒณใƒ†ใƒณใƒ„็”Ÿๆˆ (Cache): 1,500ๅ›ž/ๆœˆ โ†’ 90%ใ‚ชใƒ•             โ•‘
โ•‘  โ””โ”€ ๆˆฆ็•ฅ็ซ‹ๆกˆ (Thinking)  :   100ๅ›ž/ๆœˆ โ†’ 80%ใ‚ชใƒ•             โ•‘
โ•‘                                                              โ•‘
โ•‘  ๐Ÿ’ต ใ‚ณใ‚นใƒˆๆฏ”่ผƒ                                                โ•‘
โ•‘  โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”              โ•‘
โ•‘  โ”‚ ๅฎšไพก       : $450.00  โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ โ”‚              โ•‘
โ•‘  โ”‚ ็ฏ€็ด„ๅพŒ     : $89.50   โ–ˆโ–ˆโ–ˆโ–ˆ                 โ”‚              โ•‘
โ•‘  โ”‚ ็ฏ€็ด„้ก     : $360.50  (80.1%ๅ‰Šๆธ›!)         โ”‚              โ•‘
โ•‘  โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜              โ•‘
โ•‘                                                              โ•‘
โ•‘  ๐ŸŽฏ ใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏๅˆฅๅŠนๆžœ                                          โ•‘
โ•‘  โ”œโ”€ Batch API      : -$75.00  (50%ๅ‰Šๆธ›)                     โ•‘
โ•‘  โ”œโ”€ Prompt Caching : -$243.00 (90%ๅ‰Šๆธ›)                     โ•‘
โ•‘  โ””โ”€ Ext. Thinking  : -$42.50  (80%ๅ‰Šๆธ›)                     โ•‘
โ•‘                                                              โ•‘
โ•‘  โœ… ๅนด้–“ๆ›็ฎ—็ฏ€็ด„้ก: $4,326.00                                 โ•‘
โ•‘                                                              โ•‘
โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
Enter fullscreen mode Exit fullscreen mode

๐Ÿ› ๏ธ ใ™ใใซไฝฟใˆใ‚‹็ตฑๅˆใ‚ฏใƒฉใ‚น

3ใคใฎใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใ‚’็ตฑๅˆใ—ใŸไพฟๅˆฉใชใ‚ฏใƒฉใ‚นใ‚’ไฝœใ‚Šใพใ—ใŸ๏ผš

import anthropic
from dataclasses import dataclass
from typing import Optional
from enum import Enum

class OptimizationMode(Enum):
    BATCH = "batch"           # 50%ใ‚ชใƒ•ใ€24ๆ™‚้–“ไปฅๅ†…
    CACHED = "cached"         # 90%ใ‚ชใƒ•ใ€็นฐใ‚Š่ฟ”ใ—ๅ‡ฆ็†
    THINKING = "thinking"     # 80%ใ‚ชใƒ•ใ€่ค‡้›‘ใชๆŽจ่ซ–

@dataclass
class CostOptimizedRequest:
    """ใ‚ณใ‚นใƒˆๆœ€้ฉๅŒ–ใ•ใ‚ŒใŸAPIใƒชใ‚ฏใ‚จใ‚นใƒˆ"""
    mode: OptimizationMode
    prompt: str
    system_prompt: Optional[str] = None
    cache_context: Optional[str] = None
    thinking_budget: int = 10000

class ClaudeCostOptimizer:
    """Claude API ใ‚ณใ‚นใƒˆๆœ€้ฉๅŒ–ใ‚ฏใƒฉใ‚น"""

    def __init__(self):
        self.client = anthropic.Anthropic()
        self.stats = {
            "total_requests": 0,
            "estimated_savings": 0.0
        }

    def process(self, request: CostOptimizedRequest):
        """ๆœ€้ฉๅŒ–ใƒขใƒผใƒ‰ใซๅฟœใ˜ใฆๅ‡ฆ็†ใ‚’ๅฎŸ่กŒ"""

        self.stats["total_requests"] += 1

        if request.mode == OptimizationMode.BATCH:
            return self._process_batch(request)
        elif request.mode == OptimizationMode.CACHED:
            return self._process_cached(request)
        elif request.mode == OptimizationMode.THINKING:
            return self._process_thinking(request)

    def _process_batch(self, request):
        """Batch APIๅ‡ฆ็†๏ผˆ50%ใ‚ชใƒ•๏ผ‰"""
        batch = self.client.batches.create(
            requests=[{
                "custom_id": "opt-request",
                "params": {
                    "model": "claude-sonnet-4-20250514",
                    "max_tokens": 1024,
                    "messages": [{"role": "user", "content": request.prompt}]
                }
            }]
        )
        self.stats["estimated_savings"] += 0.50  # ๆฆ‚็ฎ—
        return {"batch_id": batch.id, "mode": "batch", "savings": "50%"}

    def _process_cached(self, request):
        """Prompt Cachingๅ‡ฆ็†๏ผˆ90%ใ‚ชใƒ•๏ผ‰"""
        system = []
        if request.system_prompt:
            system.append({"type": "text", "text": request.system_prompt})
        if request.cache_context:
            system.append({
                "type": "text",
                "text": request.cache_context,
                "cache_control": {"type": "ephemeral"}
            })

        response = self.client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=1024,
            system=system if system else None,
            messages=[{"role": "user", "content": request.prompt}]
        )

        cache_read = getattr(response.usage, 'cache_read_input_tokens', 0)
        if cache_read > 0:
            self.stats["estimated_savings"] += 0.90

        return {"response": response.content[0].text, "mode": "cached", "savings": "90%"}

    def _process_thinking(self, request):
        """Extended Thinkingๅ‡ฆ็†๏ผˆ80%ใ‚ชใƒ•๏ผ‰"""
        response = self.client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=16000,
            thinking={"type": "enabled", "budget_tokens": request.thinking_budget},
            messages=[{"role": "user", "content": request.prompt}]
        )

        self.stats["estimated_savings"] += 0.80

        answer = next(
            (b.text for b in response.content if b.type == "text"),
            None
        )
        return {"response": answer, "mode": "thinking", "savings": "80%"}

    def get_stats(self):
        """็ตฑ่จˆๆƒ…ๅ ฑใ‚’ๅ–ๅพ—"""
        return self.stats

# ไฝฟ็”จไพ‹
optimizer = ClaudeCostOptimizer()

# Batchๅ‡ฆ็†๏ผˆๅณๆ™‚ๆ€งไธ่ฆใชๅคง้‡ๅ‡ฆ็†๏ผ‰
result1 = optimizer.process(CostOptimizedRequest(
    mode=OptimizationMode.BATCH,
    prompt="ใ“ใฎ็”ปๅƒใฎๅ‹•็‰ฉใ‚’่ญ˜ๅˆฅใ—ใฆใใ ใ•ใ„"
))

# Cacheๅ‡ฆ็†๏ผˆๅŒใ˜ใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆใง่ค‡ๆ•ฐ่ณชๅ•๏ผ‰
result2 = optimizer.process(CostOptimizedRequest(
    mode=OptimizationMode.CACHED,
    prompt="Jellyใฎ็‰นๅพดใฏ๏ผŸ",
    cache_context="[ๅ’Œๅฟƒๆ‘ใฎๅ‹•็‰ฉใƒ‡ใƒผใ‚ฟใƒ™ใƒผใ‚น...]"
))

# Thinkingๅ‡ฆ็†๏ผˆ่ค‡้›‘ใชๆŽจ่ซ–๏ผ‰
result3 = optimizer.process(CostOptimizedRequest(
    mode=OptimizationMode.THINKING,
    prompt="28ๅŒนใฎๅ‹•็‰ฉใฎๆœ€้ฉใชๅฅๅบท็ฎก็†ใƒ—ใƒฉใƒณใ‚’่จญ่จˆใ—ใฆใใ ใ•ใ„",
    thinking_budget=15000
))

print(f"๐Ÿ“Š ็ตฑ่จˆ: {optimizer.get_stats()}")
Enter fullscreen mode Exit fullscreen mode

๐ŸŽฏ ใฉใฎใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใ‚’ไฝฟใ†ในใ๏ผŸ

ๅˆคๆ–ญใƒ•ใƒญใƒผใƒใƒฃใƒผใƒˆ๏ผš

ๅ‡ฆ็†ใฎๅณๆ™‚ๆ€งใŒๅฟ…่ฆ๏ผŸ
โ”œโ”€ ใ„ใ„ใˆ โ†’ Batch API (50%ใ‚ชใƒ•) โœ…
โ””โ”€ ใฏใ„
    โ”œโ”€ ๅŒใ˜ใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆใ‚’็นฐใ‚Š่ฟ”ใ—ไฝฟใ†๏ผŸ
    โ”‚   โ””โ”€ ใฏใ„ โ†’ Prompt Caching (90%ใ‚ชใƒ•) โœ…
    โ””โ”€ ่ค‡้›‘ใชๆŽจ่ซ–ใŒๅฟ…่ฆ๏ผŸ
        โ”œโ”€ ใฏใ„ โ†’ Extended Thinking (80%ใ‚ชใƒ•) โœ…
        โ””โ”€ ใ„ใ„ใˆ โ†’ ้€šๅธธAPI
Enter fullscreen mode Exit fullscreen mode
ใ‚ทใƒผใƒณ ๆŽจๅฅจใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏ ็ฏ€็ด„็އ
ๅคœ้–“ใƒใƒƒใƒๅ‡ฆ็† Batch API 50%
ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆๅˆ†ๆž Prompt Caching 90%
ใ‚ณใƒผใƒ‰็”Ÿๆˆ Extended Thinking 80%
ใƒใƒฃใƒƒใƒˆใƒœใƒƒใƒˆ Prompt Caching 90%
็”ปๅƒๅคง้‡ๅˆ†ๆž Batch API 50%
ๆˆฆ็•ฅ็ซ‹ๆกˆ Extended Thinking 80%

๐Ÿ“š ๅ‚่€ƒใƒชใƒณใ‚ฏ


๐Ÿพ ใŠใ‚ใ‚Šใซ

ๅ’Œๅฟƒๆ‘ใงใฏใ€ใ“ใ‚Œใ‚‰ใฎใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใ‚’้ง†ไฝฟใ—ใฆใ€28ๅŒนใฎ็Œซใจ็ŠฌใŸใกใฎAIๅˆ†ๆžใ‚’ๆŒ็ถšๅฏ่ƒฝใชๅฝขใง้‹ๅ–ถใ—ใฆใ„ใพใ™ใ€‚

ใ€ŒAIใฎๅŠ›ใงใ€ๅ‹•็‰ฉใŸใกใ‚’ใ‚‚ใฃใจๅนธใ›ใซใ€

ใ‚ณใ‚นใƒˆๅ‰Šๆธ›ใฏใ€ใใฎๅคขใ‚’ๅฎŸ็พใ™ใ‚‹ใŸใ‚ใฎๅคงๅˆ‡ใชไธ€ๆญฉใงใ™ใ€‚

็š†ใ•ใ‚“ใ‚‚ใœใฒใ€ใ“ใ‚Œใ‚‰ใฎใƒ†ใ‚ฏใƒ‹ใƒƒใ‚ฏใ‚’่ฉฆใ—ใฆใฟใฆใใ ใ•ใ„ใ€‚่ณชๅ•ใŒใ‚ใ‚Œใฐใ€ใ‚ณใƒกใƒณใƒˆๆฌ„ใงใŠๆฐ—่ปฝใซใฉใ†ใž๏ผ


๐Ÿพ by ๅ’Œๅฟƒๆ‘ washinmura.jp

ๅ’Œ็‰ ไธ€่ตท๏ผŒ็™‚็™’ๅ…จไธ–็•Œ
ใƒšใƒƒใƒˆใจไธ€็ท’ใซใ€ไธ–็•Œใ‚’็™’ใใ†


Claude #Anthropic #API็ฏ€็ด„ #AI้–‹็™บ #LLM #ๆฉŸๆขฐๅญฆ็ฟ’ #Python #้–‹็™บ่€… #ใƒ—ใƒญใ‚ฐใƒฉใƒŸใƒณใ‚ฐ #ๆŠ€่ก“ๅ…ฑๆœ‰ #ใ‚ณใ‚นใƒˆๅ‰Šๆธ› #ClaudeAPI #AIใƒ„ใƒผใƒซ #ๅŠน็އๅŒ– #ๆŠ€่ก“tips #AnthropicAPI #AIใ‚ณใ‚นใƒˆ #้–‹็™บๅŠน็އ #ใƒ†ใƒƒใ‚ฏ #ใ‚จใƒณใ‚ธใƒ‹ใ‚ข

Top comments (0)