Serverless Framework is the original infrastructure-as-code tool for serverless. Define your Lambda functions, API Gateway, DynamoDB, and S3 in one YAML file.
serverless.yml: Your Entire Backend
service: scraping-api
frameworkVersion: '4'
provider:
name: aws
runtime: nodejs20.x
region: us-east-1
environment:
TABLE_NAME: ${self:service}-products
BUCKET_NAME: ${self:service}-data
functions:
scrape:
handler: src/handlers/scrape.handler
timeout: 300
memorySize: 512
events:
- http:
path: /scrape
method: post
cors: true
- schedule:
rate: rate(1 hour)
input:
source: scheduled
getProducts:
handler: src/handlers/products.list
events:
- http:
path: /products
method: get
cors: true
getProduct:
handler: src/handlers/products.get
events:
- http:
path: /products/{id}
method: get
cors: true
resources:
Resources:
ProductsTable:
Type: AWS::DynamoDB::Table
Properties:
TableName: ${self:provider.environment.TABLE_NAME}
BillingMode: PAY_PER_REQUEST
AttributeDefinitions:
- AttributeName: id
AttributeType: S
- AttributeName: category
AttributeType: S
KeySchema:
- AttributeName: id
KeyType: HASH
GlobalSecondaryIndexes:
- IndexName: category-index
KeySchema:
- AttributeName: category
KeyType: HASH
Projection:
ProjectionType: ALL
DataBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: ${self:provider.environment.BUCKET_NAME}
Handler Functions
// src/handlers/scrape.ts
import { APIGatewayProxyHandler } from "aws-lambda";
export const handler: APIGatewayProxyHandler = async (event) => {
const { url } = JSON.parse(event.body || "{}");
const html = await fetch(url).then(r => r.text());
const products = parseProducts(html);
// Save to DynamoDB
for (const product of products) {
await dynamodb.put({ TableName: TABLE_NAME, Item: product }).promise();
}
return {
statusCode: 200,
headers: { "Access-Control-Allow-Origin": "*" },
body: JSON.stringify({ scraped: products.length }),
};
};
CLI Commands
# Deploy
serverless deploy
# Deploy single function (fast)
serverless deploy function -f scrape
# Invoke locally
serverless invoke local -f scrape --data '{"body":"{\"url\":\"https://example.com\"}"}}'
# View logs
serverless logs -f scrape -t
# Remove everything
serverless remove
# Dev mode (hot reload)
serverless dev
Stages and Variables
provider:
stage: ${opt:stage, 'dev'}
environment:
TABLE_NAME: ${self:service}-${self:provider.stage}-products
custom:
stages:
dev:
memorySize: 256
prod:
memorySize: 1024
serverless deploy --stage prod
Deploy serverless scraping APIs? My Apify tools + Serverless = auto-scaling data pipelines.
Custom serverless solution? Email spinov001@gmail.com
Top comments (0)