# 3a. Prepare resource usage CSV file for the triggered pod
OUTPUT_DIR="$SCRIPTPATH/output"
mkdir -p "$OUTPUT_DIR"
RESOURCE_CSV="$OUTPUT_DIR/${JOB_NAME}_pod_resources.csv"
echo "podname,cpu_usage,memory_usage,timestamp" > "$RESOURCE_CSV"
# >>> BEGIN add ShieldCache df CSV (minimal)
SC_DF_CSV="$OUTPUT_DIR/${JOB_NAME}_shieldcache_df.csv"
echo "timestamp,pod,filesystem,size,used,avail,use_pct,mountpoint" > "$SC_DF_CSV"
# <<< END add ShieldCache df CSV (minimal)
Next
while true; do
USAGE_LINE=$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | \
awk -v ts="$(date +'%Y-%m-%d %H:%M:%S')" '{print $1","$2","$3","ts}')
if [ -n "$USAGE_LINE" ]; then
echo "$USAGE_LINE" >> "$RESOURCE_CSV"
fi
# >>> BEGIN add ShieldCache df sample (minimal)
DF_LINE=$(kubectl -n "$NAMESPACE" exec "$POD_NAME" -- sh -c 'df -hP /app/ShieldCache | tail -n +2' 2>/dev/null | \
awk -v ts="$(date +"%Y-%m-%d %H:%M:%S")" -v pod="$POD_NAME" '{printf "%s,%s,%s,%s,%s,%s,%s,%s\n", ts,pod,$1,$2,$3,$4,$5,$6}')
if [ -n "$DF_LINE" ]; then
echo "$DF_LINE" >> "$SC_DF_CSV"
fi
# <<< END add ShieldCache df sample (minimal)
Updated
#!/bin/bash
# Define variables
SCRIPT=$(realpath -s "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
NAMESPACE="onprem" # Namespace where the pods are located
CRONJOB_NAME="teams-chat" # Name of the CronJob you want to trigger the job from
JOB_NAME="teams-chat-$(date +'%Y%m%d-%H%M%S')" # Fixed job name
TIMESTAMP=$(date +'%Y-%m-%d_%H%M%S') # Timestamp for log file
LOG_PATH="$SCRIPTPATH/Logs/${TIMESTAMP}-${JOB_NAME}.log" # Log file with timestamp
# List of keywords to check in the logs (customize as needed)
ERROR_KEYWORDS=("java.io.EOFException" "NoSuchMethodError")
SUCCESS_KEYWORDS=("TGM Policy Ended")
DAY_OF_WEEK=$(date +%A)
if [[ "$DAY_OF_WEEK" != "Sunday" && "$DAY_OF_WEEK" != "Monday" ]]; then
echo "Today is $DAY_OF_WEEK. Starting import job..."
else
echo "Today is $DAY_OF_WEEK. Import job will be skipped as there is no delivery."
exit 0
fi
# 1. Validate the namespace exists
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] More detailed logs can be found in $LOG_PATH" | tee -a "$LOG_PATH"
if ! kubectl get namespace "$NAMESPACE" &>/dev/null; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [ERROR] Error: Namespace $NAMESPACE does not exist." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
exit 1
fi
# 2. Create the job manually from the CronJob
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Creating job $JOB_NAME from CronJob $CRONJOB_NAME in namespace $NAMESPACE..." | tee -a "$LOG_PATH"
kubectl create job --from=cronjob/$CRONJOB_NAME -n "$NAMESPACE" "$JOB_NAME"
if [[ $? -ne 0 ]]; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [ERROR] Error: Failed to create job from CronJob." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
exit 1
fi
# 3. Wait for the pod associated with the job to be created
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Waiting for pod associated with job $JOB_NAME to be created..." | tee -a "$LOG_PATH"
while true; do
POD_NAME=$(kubectl get pods -n "$NAMESPACE" -l job-name="$JOB_NAME" --no-headers | awk '{print $1}' | head -n 1)
if [ -n "$POD_NAME" ]; then
NODE_NAME=$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.nodeName}')
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Pod $POD_NAME associated with job $JOB_NAME has been created on node $NODE_NAME" | tee -a "$LOG_PATH"
break
fi
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Pod for job $JOB_NAME not yet created... Retrying in 10 seconds." | tee -a "$LOG_PATH"
sleep 10
done
# 3a. Prepare resource usage CSV file for the triggered pod
OUTPUT_DIR="$SCRIPTPATH/output"
mkdir -p "$OUTPUT_DIR"
RESOURCE_CSV="$OUTPUT_DIR/${JOB_NAME}_pod_resources.csv"
echo "podname,cpu_usage,memory_usage,timestamp" > "$RESOURCE_CSV"
# >>> NEW (ShieldCache df): add a second CSV for /app/ShieldCache filesystem snapshots
SC_DF_CSV="$OUTPUT_DIR/${JOB_NAME}_shieldcache_df.csv"
echo "timestamp,pod,filesystem,size,used,avail,use_pct,mountpoint" > "$SC_DF_CSV"
# <<< NEW (ShieldCache df)
# 4. Wait for the pod to complete (success or failure) and check for "Error" state
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Waiting for pod $POD_NAME to complete..." | tee -a "$LOG_PATH"
while true; do
# Collect resource usage for the pod every 30 seconds
USAGE_LINE=$(kubectl top pod "$POD_NAME" -n "$NAMESPACE" --no-headers 2>/dev/null | awk -v ts="$(date +'%Y-%m-%d %H:%M:%S')" '{print $1","$2","$3","ts}')
if [ -n "$USAGE_LINE" ]; then
echo "$USAGE_LINE" >> "$RESOURCE_CSV"
fi
# >>> NEW (ShieldCache df): sample df -hP /app/ShieldCache into the second CSV
DF_LINE=$(kubectl -n "$NAMESPACE" exec "$POD_NAME" -- sh -c "df -hP /app/ShieldCache | tail -n +2" 2>/dev/null | \
awk -v ts="$(date +'%Y-%m-%d %H:%M:%S')" -v pod="$POD_NAME" '{printf "%s,%s,%s,%s,%s,%s,%s,%s\n", ts,pod,$1,$2,$3,$4,$5,$6}')
if [ -n "$DF_LINE" ]; then
echo "$DF_LINE" >> "$SC_DF_CSV"
fi
# <<< NEW (ShieldCache df)
POD_STATUS=$(kubectl get pods -n "$NAMESPACE" --no-headers | awk -v pod="$POD_NAME" '$1==pod {print $3}')
if [[ "$POD_STATUS" == "Completed" ]]; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Pod $POD_NAME is in $POD_STATUS status." | tee -a "$LOG_PATH"
break
elif [[ "$POD_STATUS" == "Error" || "$POD_STATUS" == "CrashLoopBackOff" ]]; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [ERROR] Pod $POD_NAME is in status $POD_STATUS." | tee -a "$LOG_PATH"
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Calling external script to send a notification." | tee -a "$LOG_PATH"
/bin/bash $SCRIPTPATH/send_SMS_alert.sh | tee -a "$LOG_PATH"
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Fetching logs for pod $POD_NAME..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" --timestamps=true -n "$NAMESPACE" >> "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
exit 2
elif [[ "$POD_STATUS" == "Running" || "$POD_STATUS" == "NotReady" || "$POD_STATUS" == "Init" || "$POD_STATUS" == "PodInitializing" ]]; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Pod $POD_NAME is still running ($POD_STATUS)... Waiting 30 seconds." | tee -a "$LOG_PATH"
sleep 30
else
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Fetching logs for pod $POD_NAME..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" --timestamps=true -n "$NAMESPACE" >> "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
exit 3
fi
done
# 5. Fetch logs from the pod
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Fetching logs for pod $POD_NAME..." | tee -a "$LOG_PATH"
kubectl logs "$POD_NAME" --timestamps=true -n "$NAMESPACE" >> "$LOG_PATH"
if [ $? -ne 0 ]; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [ERROR] Failed to fetch logs for pod $POD_NAME." | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
exit 3
fi
# 6. Grep the logs for any error-related keywords
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Checking logs for errors or exceptions..." | tee -a "$LOG_PATH"
for keyword in "${ERROR_KEYWORDS[@]}"; do
if grep -iq "$keyword" "$LOG_PATH"; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [ERROR] Error found in logs: Keyword '$keyword' found. Exiting job as failed." | tee -a "$LOG_PATH"
/bin/bash $SCRIPTPATH/send_SMS_alert.sh | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
tail -n 500 "$LOG_PATH"
exit 4
fi
done
# 7. Grep the logs for success-related keywords and ensure none are missing
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Checking logs for success-related keywords..." | tee -a "$LOG_PATH"
for keyword in "${SUCCESS_KEYWORDS[@]}"; do
if ! grep -iq "$keyword" "$LOG_PATH"; then
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Failure: Keyword '$keyword' not found in logs. Exiting job as failed." | tee -a "$LOG_PATH"
/bin/bash $SCRIPTPATH/send_SMS_alert.sh | tee -a "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
tail -n 500 "$LOG_PATH"
exit 5
fi
done
# 8. Output the last 500 lines of the log file to the terminal (execution agent)
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Displaying the last 500 lines of the log file:" | tee -a "$LOG_PATH"
tail -n 500 "$LOG_PATH"
cp "$LOG_PATH" "$SCRIPTPATH/Lastlog/lastpodlog.txt"
# 9. Job completed successfully
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] Job $JOB_NAME completed successfully." | tee -a "$LOG_PATH"
exit 0
Top comments (0)