#!/bin/bash
# =============================================================
# Cronjob_monitorpod.sh
# Manually trigger a CronJob, track the newest pod for its Job,
# collect pod logs + periodic CPU/MEM. Additionally, whenever a
# log line contains "/app/ShieldCache", we take an immediate
# CPU/MEM snapshot and append it to the SAME RESOURCE_CSV.
# =============================================================
set -euo pipefail
# --------- EDIT THESE ----------
NAMESPACE="onprem" # namespace where the CronJob lives
CRONJOB_NAME="teams-chat" # CronJob to trigger
JOB_NAME_BASE="teams-chat" # base Job name; script adds timestamp
# -------------------------------
# Paths
SCRIPT="$(realpath -s "$0")"
SCRIPTPATH="$(dirname "$SCRIPT")"
# Timestamps: one safe for K8s names (hyphens), one for files (underscores OK)
TIMESTAMP_K8S="$(date +'%Y%m%d-%H%M%S')" # safe for k8s object names
TIMESTAMP_FILE="$(date +'%Y%m%d_%H%M%S')" # file-friendly
# RFC1123-safe Job name
JOB_NAME="$(echo "${JOB_NAME_BASE}-${TIMESTAMP_K8S}" | tr '[:upper:]_' '[:lower:]-')"
# Ensure both typical casings exist; choose one canonical
mkdir -p "$SCRIPTPATH/Logs" "$SCRIPTPATH/Lastlog" >/dev/null 2>&1 || true
LOG_DIR="$SCRIPTPATH/Logs"
LASTLOG_DIR="$SCRIPTPATH/Lastlog"
# Files (only these two will be written)
LOG_PATH="$LOG_DIR/${TIMESTAMP_FILE}-${JOB_NAME_BASE}.log"
RESOURCE_CSV="$LOG_DIR/${TIMESTAMP_FILE}-${JOB_NAME_BASE}_pod_resources.csv"
# Keyword checks (customize)
ERROR_KEYWORDS=("java.io.EOFException" "NoSuchMethodError" "CrashLoopBackOff" "Error")
SUCCESS_KEYWORDS=("TGM Policy Ended" "Job completed" "Completed successfully")
ts(){ date +'%Y-%m-%d %H:%M:%S'; }
echo "$(ts) [INFO] Logs: $LOG_PATH"
echo "$(ts) [INFO] Namespace: $NAMESPACE | CronJob: $CRONJOB_NAME | Job: $JOB_NAME" | tee -a "$LOG_PATH"
# --- Validate namespace ---
if ! kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then
echo "$(ts) [ERROR] Namespace '$NAMESPACE' not found." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
exit 1
fi
# --- Create Job from CronJob ---
echo "$(ts) [INFO] Creating Job '$JOB_NAME' from CronJob '$CRONJOB_NAME'..." | tee -a "$LOG_PATH"
if ! kubectl -n "$NAMESPACE" create job --from=cronjob/"$CRONJOB_NAME" "$JOB_NAME" >/dev/null 2>&1; then
echo "$(ts) [ERROR] Failed to create Job from CronJob (name collision? RBAC?)." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true
exit 1
fi
# --- Find newest pod for this Job ---
get_newest_pod() {
kubectl -n "$NAMESPACE" get pods -l "job-name=${JOB_NAME}" \
--sort-by=.metadata.creationTimestamp \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | tail -n 1
}
echo "$(ts) [INFO] Waiting for pod for Job '$JOB_NAME'..." | tee -a "$LOG_PATH"
POD_NAME=""
while [[ -z "${POD_NAME}" ]]; do
POD_NAME="$(get_newest_pod)"
[[ -z "$POD_NAME" ]] && { echo "$(ts) [INFO] Not yet created… retry in 10s"; sleep 10; }
done
NODE_NAME="$(kubectl -n "$NAMESPACE" get pod "$POD_NAME" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true)"
echo "$(ts) [INFO] Pod: $POD_NAME | Node: ${NODE_NAME:-unknown}" | tee -a "$LOG_PATH"
# --- Prepare CSV (same file for periodic samples AND ShieldCache hits) ---
echo "timestamp,pod,cpu,mem" > "$RESOURCE_CSV"
# --- Background watcher: on /app/ShieldCache lines, append a usage row to RESOURCE_CSV ---
kubectl -n "$NAMESPACE" logs -f "pod/${POD_NAME}" --timestamps=true 2>/dev/null | \
while IFS= read -r line; do
if echo "$line" | grep -q "/app/ShieldCache"; then
now="$(ts)"
usage="$(kubectl -n "$NAMESPACE" top pod "$POD_NAME" --no-headers 2>/dev/null | awk '{print $2","$3}')"
echo "$now,$POD_NAME,${usage:-,}" >> "$RESOURCE_CSV"
echo "$(ts) [INFO] ShieldCache activity detected; sampled usage: ${usage:-N/A}" >> "$LOG_PATH"
fi
done &
CACHE_MON_PID=$!
cleanup(){ kill "$CACHE_MON_PID" >/dev/null 2>&1 || true; cp "$LOG_PATH" "$LASTLOG_DIR/lastpodlog.txt" 2>/dev/null || true; }
trap cleanup EXIT
# --- Main monitoring loop: periodic CPU/MEM and phase ---
echo "$(ts) [INFO] Monitoring pod resource usage… (to $RESOURCE_CSV)" | tee -a "$LOG_PATH"
while true; do
# Periodic sample (every loop)
line="$(kubectl -n "$NAMESPACE" top pod "$POD_NAME" --no-headers 2>/dev/null | awk -v t="$(ts)" '{print t","$1","$2","$3}')"
[[ -n "$line" ]] && echo "$line" >> "$RESOURCE_CSV"
phase="$(kubectl -n "$NAMESPACE" get pod "$POD_NAME" -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")"
case "$phase" in
Running|Pending)
echo "$(ts) [INFO] Pod $POD_NAME is $phase. Sleeping 30s…" | tee -a "$LOG_PATH"
sleep 30
;;
Succeeded|Completed)
echo "$(ts) [INFO] Pod $POD_NAME finished with phase: $phase." | tee -a "$LOG_PATH"
break
;;
Failed|Error)
echo "$(ts) [ERROR] Pod $POD_NAME ended with phase: $phase." | tee -a "$LOG_PATH"
exit 2
;;
*)
echo "$(ts) [WARN] Pod $POD_NAME phase '$phase' (waiting 20s)..." | tee -a "$LOG_PATH"
sleep 20
;;
esac
done
# --- Fetch final logs once ---
echo "$(ts) [INFO] Fetching final logs for pod $POD_NAME…" | tee -a "$LOG_PATH"
kubectl -n "$NAMESPACE" logs "pod/${POD_NAME}" --timestamps=true >> "$LOG_PATH" 2>&1 || true
# --- Keyword scans ---
for kw in "${ERROR_KEYWORDS[@]}"; do
if grep -qi -- "$kw" "$LOG_PATH"; then
echo "$(ts) [ERROR] Error keyword found in logs: '$kw'." | tee -a "$LOG_PATH"
exit 3
fi
done
for kw in "${SUCCESS_KEYWORDS[@]}"; do
if ! grep -qi -- "$kw" "$LOG_PATH"; then
echo "$(ts) [WARN] Success keyword '$kw' not found in logs." | tee -a "$LOG_PATH"
fi
done
echo "$(ts) [INFO] Done. Files:"
echo " - $LOG_PATH"
echo " - $RESOURCE_CSV"
exit 0
updated
#!/bin/bash
# Define variables
SCRIPT="$(realpath -s "$0")"
SCRIPTPATH="$(dirname "$SCRIPT")"
NAMESPACE="onprem" # Namespace where the pods are located
CRONJOB_NAME="teams-chat" # Name of the CronJob you want to trigger the job from
JOB_NAME="teams-chat" # Fixed job name
TIMESTAMP="$(date +'%Y%m%d_%H%M%S')" # Timestamp for log file
LOG_PATH="$SCRIPTPATH/Logs/${TIMESTAMP}-${JOB_NAME}.log" # Log file with timestamp
RESOURCE_CSV="$SCRIPTPATH/Logs/${TIMESTAMP}-${JOB_NAME}_pod_resources.csv"
# List of keywords to check in the logs (customize as needed)
ERROR_KEYWORDS=("java.io.EOFException" "NoSuchMethodError")
SUCCESS_KEYWORDS=("TGM Policy Ended")
DAY_OF_WEEK="$(date +%A)"
# Script help
print_help() {
clear
echo "---------------------------------------------------------------"
echo "Script is used to monitor given POD resources (Memory and CPU)."
echo " "
echo "It must be run with ONE mandatory parameter - name of POD to monitor."
echo "Usage: $0 <pod-name>"
echo "Example:"
echo "$0 smtp-email-1-29002137-fwtqx"
echo "Use \"kubectl get pods\" command to list available PODs."
echo "---------------------------------------------------------------"
exit 0
}
# Day gating as in your screenshots
if [[ "$DAY_OF_WEEK" != "Sunday" && "$DAY_OF_WEEK" != "Monday" ]]; then
echo "Today is $DAY_OF_WEEK. Starting import job..."
else
echo "Today is $DAY_OF_WEEK. Import job will be skipped as there is no delivery."
exit 0
fi
# 1. Validate the namespace exists
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] More detailed logs can be found in $LOG_PATH"
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Error: Namespace $NAMESPACE does not exist." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
exit 1
fi
# 2. Create the job manually from the CronJob
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Creating job $JOB_NAME from CronJob $CRONJOB_NAME in namespace $NAMESPACE..." | tee -a "$LOG_PATH"
kubectl create job --from=cronjob/"$CRONJOB_NAME" -n "$NAMESPACE" "$JOB_NAME"
if [[ $? -ne 0 ]]; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Error: Failed to create job from CronJob." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
exit 1
fi
# 3. Wait for the pod associated with the job to be created
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Waiting for pod associated with job $JOB_NAME to be created..." | tee -a "$LOG_PATH"
while true
do
POD_NAME="$(kubectl get pods -n "$NAMESPACE" -l job-name="$JOB_NAME" --no-headers | awk '{print $1}' | head -n 1)"
if [[ -n "$POD_NAME" ]]; then
NODE_NAME="$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.nodeName}')"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod $POD_NAME associated with job $JOB_NAME has been created on node $NODE_NAME." | tee -a "$LOG_PATH"
break
else
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod for job $JOB_NAME not yet created... Retrying in 10 seconds." | tee -a "$LOG_PATH"
sleep 10
fi
done
# 4. Prepare resource usage CSV file for the triggered pod
echo "ts,pod,cpu,mem" > "$RESOURCE_CSV"
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# (ADDED) ShieldCache correlator:
# Start a minimal background watcher that tails the pod logs and whenever
# a line contains "/app/ShieldCache", it takes a one-off CPU/MEM sample
# and appends it to the SAME RESOURCE_CSV in the same 4-column format.
kubectl logs -f "$POD_NAME" -n "$NAMESPACE" --timestamps=true 2>/dev/null | \
while IFS= read -r line; do
if echo "$line" | grep -q "/app/ShieldCache"; then
ts_now="$(date +'%Y-%m-%d %H:%M:%S')"
usage_line="$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk '{print $2","$3}')"
echo "$ts_now,$POD_NAME,${usage_line:-,}" >> "$RESOURCE_CSV"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] ShieldCache activity detected; sampled usage appended to CSV." >> "$LOG_PATH"
fi
done &
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# 5. Wait for the pod to complete (success or failure) and check for "Error" state
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Waiting for pod $POD_NAME to complete..." | tee -a "$LOG_PATH"
while true; do
# Collect resource usage for the pod every 30 seconds
USAGE_LINE="$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk -v ts="$(date +'%Y-%m-%d %H:%M:%S')" '{print ts","$1","$2","$3}')"
if [[ -n "$USAGE_LINE" ]]; then
echo "$USAGE_LINE" >> "$RESOURCE_CSV"
fi
POD_STATUS="$(kubectl get pods -n "$NAMESPACE" --no-headers | awk -v pod="$POD_NAME" '$1==pod {print $3}')"
if [[ "$POD_STATUS" == "Completed" ]]; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod $POD_NAME status: $POD_STATUS." | tee -a "$LOG_PATH"
break
elif [[ "$POD_STATUS" == "Error" || "$POD_STATUS" == "CrashLoopBackOff" ]]; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Pod $POD_NAME status: $POD_STATUS." | tee -a "$LOG_PATH"
# /bin/bash "$SCRIPTPATH/send_SMS_alert.sh" | tee -a "$LOG_PATH"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Creating timestamps-true.txt for pod $POD_NAME." | tee -a "$LOG_PATH"
echo "$(date +'%Y-%m-%d %H:%M:%S') namespace=$NAMESPACE" >> "$SCRIPTPATH/Lastlog/timestamps-true.txt"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
exit 2
elif [[ "$POD_STATUS" == "Running" || "$POD_STATUS" == "NotReady" || "$POD_STATUS" == "Init" || "$POD_STATUS" == "PodInitializing" ]]; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Something went wrong? pod status=$POD_STATUS..." | tee -a "$LOG_PATH"
# /bin/bash "$SCRIPTPATH/send_SMS_alert.sh" | tee -a "$LOG_PATH"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Fetching logs for pod $POD_NAME..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" -n "$NAMESPACE" --timestamps=true | tee -a "$LOG_PATH"
tail -n 100 "$LOG_PATH"
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Pod $POD_NAME is still running ($POD_STATUS). Waiting 30 seconds." | tee -a "$LOG_PATH"
sleep 30
else
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Fetching logs for pod $POD_NAME..." | tee -a "$LOG_PATH"
if ! kubectl logs "$POD_NAME" -n "$NAMESPACE" --timestamps=true >> "$LOG_PATH" 2>&1; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Failed to fetch logs for pod $POD_NAME." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
exit 3
fi
break
fi
done
# 6. Fetch logs from the pod (final)
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Fetching logs for pod $POD_NAME..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" -n "$NAMESPACE" --timestamps=true | tee -a "$LOG_PATH"
if [[ $? -ne 0 ]]; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Failed to fetch logs for pod $POD_NAME." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
exit 3
fi
# 7. Grep the logs for any error-related keywords
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Checking logs for errors or exceptions..." | tee -a "$LOG_PATH"
for keyword in "${ERROR_KEYWORDS[@]}"; do
if grep -iq "$keyword" "$LOG_PATH"; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR] Error found in logs: Keyword '$keyword' found. Exiting job as failed." | tee -a "$LOG_PATH"
# /bin/bash "$SCRIPTPATH/send_SMS_alert.sh" | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
tail -n 500 "$LOG_PATH"
exit 4
fi
done
# 8. Grep the logs for success-related keywords and ensure none are missing
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Checking logs for success-related keywords..." | tee -a "$LOG_PATH"
for keyword in "${SUCCESS_KEYWORDS[@]}"; do
if ! grep -iq "$keyword" "$LOG_PATH"; then
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Failure: Keyword '$keyword' not found in logs. Exiting job as failed." | tee -a "$LOG_PATH"
# /bin/bash "$SCRIPTPATH/send_SMS_alert.sh" | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
tail -n 500 "$LOG_PATH"
exit 5
fi
done
# 9. Output the last 500 lines of the log file to the terminal (execution agent)
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Displaying the last 500 lines of the log file:" | tee -a "$LOG_PATH"
tail -n 500 "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt" 2>/dev/null || true
# 10. Job completed successfully
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO] Job $JOB_NAME completed successfully." | tee -a "$LOG_PATH"
exit 0
Top comments (0)
Subscribe
For further actions, you may consider blocking this person and/or reporting abuse
We're a place where coders share, stay up-to-date and grow their careers.
Top comments (0)