Elasticsearch Has a Free API: Here's How to Use It for Search Automation

#elasticsearch #search #api #database

Elasticsearch provides one of the most powerful free REST APIs in the developer ecosystem. You can index documents, run full-text searches, build aggregations, and create analytics dashboards — all via HTTP.

Why Use the Elasticsearch API?

Search millions of documents in milliseconds
Aggregate data for real-time analytics
Monitor cluster health and performance
Automate index management and data pipelines

Getting Started

# Check cluster health
curl -s "http://localhost:9200/_cluster/health" | jq '{status: .status, nodes: .number_of_nodes, shards: .active_shards}'

# List all indices
curl -s "http://localhost:9200/_cat/indices?format=json" | jq '.[] | {index: .index, docs: ."docs.count", size: ."store.size"}'

Index and Search Documents

import requests

ES_URL = "http://localhost:9200"

# Index a document
def index_doc(index, doc_id, document):
    resp = requests.put(f"{ES_URL}/{index}/_doc/{doc_id}", json=document)
    return resp.json()

# Index sample data
articles = [
    {"title": "Getting Started with Python", "content": "Python is a versatile language...", "tags": ["python", "beginner"], "views": 1500},
    {"title": "Advanced JavaScript Patterns", "content": "Design patterns help structure...", "tags": ["javascript", "advanced"], "views": 3200},
    {"title": "Docker for Production", "content": "Running containers in production...", "tags": ["docker", "devops"], "views": 2800},
]

for i, article in enumerate(articles):
    index_doc("articles", i+1, article)
    print(f"Indexed: {article['title']}")

Full-Text Search

def search(index, query, size=10):
    body = {
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["title^3", "content", "tags^2"],
                "fuzziness": "AUTO"
            }
        },
        "size": size,
        "highlight": {
            "fields": {"title": {}, "content": {}}
        }
    }
    resp = requests.post(f"{ES_URL}/{index}/_search", json=body)
    hits = resp.json()["hits"]["hits"]

    for hit in hits:
        print(f"Score: {hit['_score']:.2f} | {hit['_source']['title']}")
        if "highlight" in hit:
            for field, fragments in hit["highlight"].items():
                print(f"  {field}: {fragments[0]}")
    return hits

search("articles", "python beginner")

Aggregations for Analytics

def top_tags(index, size=10):
    body = {
        "size": 0,
        "aggs": {
            "popular_tags": {
                "terms": {"field": "tags.keyword", "size": size}
            },
            "avg_views_by_tag": {
                "terms": {"field": "tags.keyword"},
                "aggs": {
                    "avg_views": {"avg": {"field": "views"}}
                }
            }
        }
    }
    resp = requests.post(f"{ES_URL}/{index}/_search", json=body)
    aggs = resp.json()["aggregations"]

    print("Top Tags:")
    for bucket in aggs["popular_tags"]["buckets"]:
        print(f"  {bucket['key']:20s} {bucket['doc_count']} articles")

    print("\nAverage Views by Tag:")
    for bucket in aggs["avg_views_by_tag"]["buckets"]:
        avg = bucket["avg_views"]["value"]
        print(f"  {bucket['key']:20s} {avg:.0f} views")

top_tags("articles")

Index Lifecycle Management

def create_ilm_policy(policy_name, hot_days=7, warm_days=30, delete_days=90):
    policy = {
        "policy": {
            "phases": {
                "hot": {
                    "actions": {
                        "rollover": {"max_size": "50GB", "max_age": f"{hot_days}d"}
                    }
                },
                "warm": {
                    "min_age": f"{warm_days}d",
                    "actions": {
                        "shrink": {"number_of_shards": 1},
                        "forcemerge": {"max_num_segments": 1}
                    }
                },
                "delete": {
                    "min_age": f"{delete_days}d",
                    "actions": {"delete": {}}
                }
            }
        }
    }
    resp = requests.put(f"{ES_URL}/_ilm/policy/{policy_name}", json=policy)
    return resp.json()

create_ilm_policy("logs-policy", hot_days=7, warm_days=30, delete_days=90)

Bulk Operations

def bulk_index(index, documents):
    lines = []
    for i, doc in enumerate(documents):
        lines.append('{"index":{"_index":"' + index + '","_id":"' + str(i) + '"}}')
        lines.append(str(doc).replace("'", '"'))

    body = "\n".join(lines) + "\n"
    resp = requests.post(f"{ES_URL}/_bulk", data=body, headers={"Content-Type": "application/x-ndjson"})
    result = resp.json()
    print(f"Indexed {len(documents)} docs in {result['took']}ms, errors: {result['errors']}")

Real-World Use Case

An e-commerce company migrated from SQL LIKE queries to Elasticsearch for their product search. Search response time dropped from 2 seconds to 15 milliseconds. They used aggregations to build real-time faceted navigation — filter by price, category, brand, and rating — all computed on-the-fly from the Elasticsearch API.