#!/usr/bin/env bash
set -euo pipefail
# ==========================================================
# WHAT TO CHANGE (or pass via flags when running)
# ----------------------------------------------------------
# NAMESPACE : Kubernetes namespace where the CronJob lives.
# CRONJOB_NAME : The CronJob you want to trigger (manual run).
# JOB_PREFIX : Optional prefix for the created Job name. If empty, uses CRONJOB_NAME.
# SAMPLE_INTERVAL: How often (seconds) to record CPU/Memory metrics.
# WAIT_POD_TIMEOUT: How long (seconds) to wait for the Job's Pod to appear.
# CONTAINER_NAME : Optional; container to log if the Pod has multiple containers.
#
# You can edit the defaults below OR pass flags:
# -n/--namespace <ns> -c/--cronjob <name> -j/--job-prefix <prefix>
# --interval <secs> --wait <secs> --container <name>
# --tue-sat-only (skip Sun/Mon)
#
# Output goes into: <script_dir>/output/<YYYYmmdd-HHMMSS>_<jobPrefix>/
# Files produced: pod.log and resources.csv
# ==========================================================
# ---- Defaults (replace with your values, or override with flags) ----
NAMESPACE="your-namespace" # e.g., "prod", "staging", "data"
CRONJOB_NAME="your-cronjob-name" # e.g., "daily-import", "billing-job"
JOB_PREFIX="" # leave empty to default to CRONJOB_NAME
SAMPLE_INTERVAL=10 # seconds between metric samples
WAIT_POD_TIMEOUT=900 # seconds to wait for pod creation
KUBECTL="${KUBECTL:-kubectl}" # path to kubectl (auto if in PATH)
CONTAINER_NAME="" # optional: pick container; default=first container
TUE_SAT_ONLY="false" # set true via --tue-sat-only
# ---- Resolve script location & set output root next to script ----
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUTPUT_ROOT="$SCRIPT_DIR/output" # Output folder created alongside this script
usage() {
cat <<EOF
Usage: ${0##*/} [-n NS] [-c CRON] [-j JOBPREFIX] [--container NAME]
[--interval SECS] [--wait SECS] [--tue-sat-only]
Examples:
${0##*/} -n data -c daily-import --interval 5
${0##*/} -n prod -c billing-job -j billing --container worker
Flags:
-n, --namespace Kubernetes namespace (default: "$NAMESPACE")
-c, --cronjob CronJob name (default: "$CRONJOB_NAME")
-j, --job-prefix Job name prefix (default: CRONJOB_NAME)
--container Specific container to log (default: first container)
--interval Metrics sample interval seconds (default: $SAMPLE_INTERVAL)
--wait Timeout waiting for pod seconds (default: $WAIT_POD_TIMEOUT)
--tue-sat-only Only run Tue–Sat (skip Sun/Mon)
-h, --help
EOF
exit 1
}
ts() { date +'%Y-%m-%d %H:%M:%S'; }
tsz() { date -u +'%Y-%m-%dT%H:%M:%SZ'; }
# ---- Parse command-line flags ----
while [[ $# -gt 0 ]]; do
case "$1" in
-n|--namespace) NAMESPACE="$2"; shift 2;;
-c|--cronjob) CRONJOB_NAME="$2"; shift 2;;
-j|--job-prefix) JOB_PREFIX="$2"; shift 2;;
--container) CONTAINER_NAME="$2"; shift 2;;
--interval) SAMPLE_INTERVAL="$2"; shift 2;;
--wait) WAIT_POD_TIMEOUT="$2"; shift 2;;
--tue-sat-only) TUE_SAT_ONLY="true"; shift;;
-h|--help) usage;;
*) echo "Unknown arg: $1" >&2; usage;;
esac
done
# ---- Basic validation ----
[[ -z "$NAMESPACE" || "$NAMESPACE" == "your-namespace" ]] && {
echo "[ERROR $(ts)] Please set a real namespace via -n/--namespace." >&2; exit 2; }
[[ -z "$CRONJOB_NAME" || "$CRONJOB_NAME" == "your-cronjob-name" ]] && {
echo "[ERROR $(ts)] Please set a real CronJob name via -c/--cronjob." >&2; exit 2; }
# ---- Cluster reachability & object checks ----
if ! "$KUBECTL" version --request-timeout=5s >/dev/null 2>&1; then
echo "[ERROR $(ts)] kubectl cannot connect to the cluster." >&2; exit 3
fi
if ! "$KUBECTL" get ns "$NAMESPACE" >/dev/null 2>&1; then
echo "[ERROR $(ts)] Namespace '$NAMESPACE' not found or not permitted." >&2; exit 4
fi
if ! "$KUBECTL" -n "$NAMESPACE" get cronjob "$CRONJOB_NAME" >/dev/null 2>&1; then
echo "[ERROR $(ts)] CronJob '$CRONJOB_NAME' not found in namespace '$NAMESPACE'." >&2; exit 5
fi
# ---- Optional day gating (Tue–Sat) ----
if [[ "$TUE_SAT_ONLY" == "true" ]]; then
dow=$(date +%u) # 1=Mon..7=Sun
if [[ "$dow" -eq 7 || "$dow" -eq 1 ]]; then
echo "[INFO $(ts)] Skipping run (Sun/Mon) due to --tue-sat-only."
exit 0
fi
fi
# ---- Prepare run folder & filenames ----
TIMESTAMP="$(date +'%Y%m%d-%H%M%S')"
JOB_PREFIX="${JOB_PREFIX:-$CRONJOB_NAME}"
RUN_DIR="$OUTPUT_ROOT/${TIMESTAMP}_${JOB_PREFIX}"
mkdir -p "$RUN_DIR"
POD_LOG="$RUN_DIR/pod.log"
RES_CSV="$RUN_DIR/resources.csv"
echo "[INFO $(ts)] Output folder: $RUN_DIR"
echo "ts_utc,container,cpu_m,mem_bytes,node_cpu_pct,node_mem_bytes" > "$RES_CSV"
# ---- Create Job from CronJob ----
echo "[INFO $(ts)] Creating Job from CronJob '$CRONJOB_NAME' in namespace '$NAMESPACE'..." | tee -a "$POD_LOG"
JOB_NAME="${JOB_PREFIX}-${TIMESTAMP}"
if ! "$KUBECTL" -n "$NAMESPACE" create job --from=cronjob/"$CRONJOB_NAME" "$JOB_NAME" >/dev/null; then
echo "[ERROR $(ts)] Failed to create Job (name collision or RBAC?)." | tee -a "$POD_LOG"
exit 6
fi
# ---- Wait for the Job's Pod to appear ----
echo "[INFO $(ts)] Waiting for Pod (label job-name=$JOB_NAME) ..." | tee -a "$POD_LOG"
POD_NAME=""
DEADLINE=$(( $(date +%s) + WAIT_POD_TIMEOUT ))
while [[ -z "$POD_NAME" && $(date +%s) -lt $DEADLINE ]]; do
POD_NAME="$("$KUBECTL" -n "$NAMESPACE" get pods -l "job-name=${JOB_NAME}" \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
[[ -z "$POD_NAME" ]] && sleep 1
end
[[ -z "$POD_NAME" ]] && { echo "[ERROR $(ts)] Timed out waiting for Pod." | tee -a "$POD_LOG"; exit 7; }
NODE_NAME="$("$KUBECTL" -n "$NAMESPACE" get pod "$POD_NAME" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true)"
echo "[INFO $(ts)] Pod: $POD_NAME | Node: ${NODE_NAME:-unknown}" | tee -a "$POD_LOG"
# ---- Decide which container to log ----
if [[ -z "$CONTAINER_NAME" ]]; then
CONTAINER_NAME="$("$KUBECTL" -n "$NAMESPACE" get pod "$POD_NAME" -o jsonpath='{.spec.containers[0].name}' 2>/dev/null || true)"
fi
echo "[INFO $(ts)] Monitoring container: ${CONTAINER_NAME:-<first-container>}" | tee -a "$POD_LOG"
# ---- Check Metrics API availability (best-effort) ----
METRICS_OK="true"
if ! "$KUBECTL" -n "$NAMESPACE" top pod "$POD_NAME" >/dev/null 2>&1; then
echo "[WARN $(ts)] Metrics API not available; resources.csv will be minimal." | tee -a "$POD_LOG"
METRICS_OK="false"
fi
# ---- Background 1: stream pod logs into pod.log ----
{
echo "[INFO $(ts)] --- BEGIN POD LOG STREAM ---"
"$KUBECTL" -n "$NAMESPACE" logs -f "$POD_NAME" ${CONTAINER_NAME:+-c "$CONTAINER_NAME"} --timestamps
echo "[INFO $(ts)] --- END POD LOG STREAM ---"
} >> "$POD_LOG" 2>&1 &
LOG_PID=$!
# ---- Background 2: sample consolidated resource usage into resources.csv ----
stop_sampling="false"
sample_loop() {
while [[ "$stop_sampling" != "true" ]]; do
TS="$(tsz)"
# Default (empty) values
CONTAINER=""; CPU_M=""; MEM_B=""; NODE_CPU=""; NODE_MEM_B=""
if [[ "$METRICS_OK" == "true" ]]; then
# Per-container metrics
# Format from `kubectl top pod --containers`:
# NAME CPU(cores) MEMORY(bytes) (headers)
# <pod> <container> <cpu> <mem>
line="$("$KUBECTL" -n "$NAMESPACE" top pod "$POD_NAME" --containers 2>/dev/null | awk 'NR==2{print}')"
if [[ -n "$line" ]]; then
# tokenize safely
set -- $line
# Depending on kubectl version, columns may be: POD CONTAINER CPU MEM
# Try to detect container position:
# (Pod name equals $POD_NAME; container likely $2)
CONTAINER="$2"
CPU_RAW="$3" # e.g., "250m" or "0" or "1"
MEM_RAW="$4" # e.g., "256Mi" or "123456Ki" or "1Gi"
# CPU → millicores
if [[ "$CPU_RAW" == *m ]]; then
CPU_M="${CPU_RAW%m}"
else
# assume cores, convert to millicores (e.g., "1" -> 1000)
CPU_M=$(awk -v v="$CPU_RAW" 'BEGIN{printf "%.0f", v*1000}')
fi
# Memory → bytes (handles Ki/Mi/Gi/Ti or K/M/G/T)
MEM_B="$MEM_RAW"
case "$MEM_B" in
*Ki) MEM_B=$(( ${MEM_B%Ki} * 1024 ));;
*Mi) MEM_B=$(( ${MEM_B%Mi} * 1024 * 1024 ));;
*Gi) MEM_B=$(( ${MEM_B%Gi} * 1024 * 1024 * 1024 ));;
*Ti) MEM_B=$(( ${MEM_B%Ti} * 1024 * 1024 * 1024 * 1024 ));;
*K) MEM_B=$(( ${MEM_B%K} * 1000 ));;
*M) MEM_B=$(( ${MEM_B%M} * 1000 * 1000 ));;
*G) MEM_B=$(( ${MEM_B%G} * 1000 * 1000 * 1000 ));;
*T) MEM_B=$(( ${MEM_B%T} * 1000 * 1000 * 1000 * 1000 ));;
esac
fi
# Node metrics: `kubectl top node <node>`
if [[ -n "${NODE_NAME:-}" ]]; then
nline="$("$KUBECTL" top node "$NODE_NAME" 2>/dev/null | awk 'NR==2{print}')"
if [[ -n "$nline" ]]; then
set -- $nline
# columns: NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
NODE_CPU="${3%%%}"
N_MEM_RAW="$5"
case "$N_MEM_RAW" in
*Ki) NODE_MEM_B=$(( ${N_MEM_RAW%Ki} * 1024 ));;
*Mi) NODE_MEM_B=$(( ${N_MEM_RAW%Mi} * 1024 * 1024 ));;
*Gi) NODE_MEM_B=$(( ${N_MEM_RAW%Gi} * 1024 * 1024 * 1024 ));;
*Ti) NODE_MEM_B=$(( ${N_MEM_RAW%Ti} * 1024 * 1024 * 1024 * 1024 ));;
*K) NODE_MEM_B=$(( ${N_MEM_RAW%K} * 1000 ));;
*M) NODE_MEM_B=$(( ${N_MEM_RAW%M} * 1000 * 1000 ));;
*G) NODE_MEM_B=$(( ${N_MEM_RAW%G} * 1000 * 1000 * 1000 ));;
*T) NODE_MEM_B=$(( ${N_MEM_RAW%T} * 1000 * 1000 * 1000 * 1000 ));;
*) NODE_MEM_B="$N_MEM_RAW";;
esac
fi
fi
fi
echo "$TS,${CONTAINER},${CPU_M},${MEM_B},${NODE_CPU},${NODE_MEM_B}" >> "$RES_CSV"
sleep "$SAMPLE_INTERVAL"
done
}
sample_loop & SAMP_PID=$!
# ---- Wait until pod completes (Succeeded/Failed) ----
PHASE=""
while true; do
PHASE="$("$KUBECTL" -n "$NAMESPACE" get pod "$POD_NAME" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")"
if [[ "$PHASE" == "Succeeded" || "$PHASE" == "Failed" || "$PHASE" == "NotFound" ]]; then
break
fi
sleep 3
done
# ---- Stop background tasks ----
stop_sampling="true"
kill "$SAMP_PID" >/dev/null 2>&1 || true
kill "$LOG_PID" >/dev/null 2>&1 || true
wait "$SAMP_PID" "$LOG_PID" 2>/dev/null || true
# ---- Final status lines in pod.log & completion ----
echo "[INFO $(ts)] Pod finished with phase: $PHASE" | tee -a "$POD_LOG"
if [[ "$PHASE" == "Failed" ]]; then
echo "[ERROR $(ts)] Job ended with Failed state." | tee -a "$POD_LOG"
exit 1
else
echo "[OK $(ts)] Job completed successfully." | tee -a "$POD_LOG"
fi
echo "[INFO $(ts)] Output saved to: $RUN_DIR" | tee -a "$POD_LOG"
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)