Detect hidden risks before sharing files β metadata, secrets, and more.
In this tutorial, weβll build a desktop privacy scanner using Python.
It helps you analyze files and detect:
π Sensitive filenames (passwords, API keysβ¦)
πΈ Image metadata (GPS location, device info)
π PDF author info
π Hidden Excel sheets
π¦ Suspicious ZIP contents
π§° 1. Install Dependencies
First, install required libraries:
pip install pillow pypdf openpyxl ttkbootstrap tkinterdnd2
ποΈ 2. Basic App Setup
We start by creating the main window using tkinter and ttkbootstrap.
import tkinter as tk
import ttkbootstrap as tb
from tkinterdnd2 import TkinterDnD
APP_NAME = "File Share Safety Checker PRO"
APP_VERSION = "3.5.0"
app = TkinterDnD.Tk()
app.title(f"{APP_NAME} v{APP_VERSION}")
app.geometry("1400x780")
tb.Style("darkly")
π‘ Explanation
TkinterDnD.Tk() β enables drag & drop
ttkbootstrap β modern UI theme
geometry() β sets window size
π¦ 3. App State (Core Variables)
We store files, results, and UI communication here:
from queue import Queue
import threading
file_list = []
results = {}
ui_queue = Queue()
progress_var = tk.IntVar(value=0)
context_mode = tk.StringVar(value="PUBLIC")
cancel_flag = threading.Event()
π‘ Why this matters
results β stores scan findings
ui_queue β thread-safe UI updates
context_mode β affects risk scoring
β οΈ 4. Risk Engine (Scoring System)
This is the brain of the app.
def add_issue(file, score, msg):
if file not in results:
results[file] = {"score": 0, "issues": []}
results[file]["score"] += score
results[file]["issues"].append(msg)
π‘ What it does
Adds issues per file
Increases risk score
Final Score Calculation
def finalize_score(file):
score = min(results[file]["score"], 100)
if context_mode.get() == "INTERNAL":
score = int(score * 0.7)
if score >= 70:
risk = "HIGH"
elif score >= 40:
risk = "WARNING"
elif score > 0:
risk = "INFO"
else:
risk = "OK"
return score, risk
π‘ Logic
Caps score at 100
Reduces risk if internal use
Converts score β readable level
π 5. File Detection Rules
π§ 5.1 Filename Scanner
import os
def check_filename(file):
keywords = ["password","secret","salary","api","key","private","token"]
name = os.path.basename(file).lower()
for k in keywords:
if k in name:
add_issue(file, 40, f"Sensitive keyword: {k}")
π‘ Detects:
risky file names like password.txt
πΈ 5.2 Image Metadata Scanner
from PIL import Image, ExifTags
def check_image(file):
try:
img = Image.open(file)
exif = img._getexif()
if not exif:
return
for tag, val in exif.items():
tag_name = ExifTags.TAGS.get(tag, tag)
if "GPS" in str(tag_name):
add_issue(file, 70, "GPS location found")
if "Model" in str(tag_name):
add_issue(file, 10, f"Device: {val}")
except:
pass
π‘ Detects:
π GPS location (high risk)
π± Device model
π 5.3 PDF Metadata
from pypdf import PdfReader
def check_pdf(file):
try:
reader = PdfReader(file)
meta = reader.metadata
if meta and meta.author:
add_issue(file, 30, f"Author: {meta.author}")
except:
pass
π 5.4 Excel Hidden Sheets
from openpyxl import load_workbook
def check_excel(file):
try:
wb = load_workbook(file)
for s in wb.worksheets:
if s.sheet_state != "visible":
add_issue(file, 30, f"Hidden sheet: {s.title}")
except:
pass
π¦ 5.5 ZIP Scanner
import zipfile
def check_zip(file):
try:
with zipfile.ZipFile(file) as z:
for name in z.namelist():
if any(x in name.lower() for x in ["password","secret","key"]):
add_issue(file, 60, f"ZIP contains: {name}")
except:
pass
β‘ 6. Multithreaded Scanner
We scan files in parallel for speed.
from concurrent.futures import ThreadPoolExecutor, as_completed
def scan():
results.clear()
ui_queue.put(("clear_results", None))
files = list(file_list)
total = len(files)
def worker(file):
check_filename(file)
if file.lower().endswith((".jpg",".png",".jpeg")):
check_image(file)
elif file.endswith(".pdf"):
check_pdf(file)
elif file.endswith(".xlsx"):
check_excel(file)
elif file.endswith(".zip"):
check_zip(file)
Run Threads
done = 0
with ThreadPoolExecutor(max_workers=6) as ex:
futures = [ex.submit(worker, f) for f in files]
for f in as_completed(futures):
done += 1
ui_queue.put(("progress", int(done/total*100)))
Push Results to UI
for f in results:
score, risk = finalize_score(f)
issues = "; ".join(results[f]["issues"])
ui_queue.put(("result", (f, risk, score, issues)))
π 7. Sanitize Images (Remove Metadata)
from PIL import Image
def strip_metadata_copy(src, dest_folder):
try:
img = Image.open(src)
data = list(img.getdata())
clean = Image.new(img.mode, img.size)
clean.putdata(data)
new_path = os.path.join(dest_folder, os.path.basename(src))
clean.save(new_path)
return new_path
except:
return None
π‘ What it does
Removes EXIF metadata
Keeps only raw pixels
π 8. File Input (UI Actions)
Add Files
from tkinter import filedialog
def add_files():
files = filedialog.askopenfilenames()
for f in files:
if f not in file_list:
file_list.append(f)
Add Folder
def add_folder():
folder = filedialog.askdirectory()
for root, _, files in os.walk(folder):
for f in files:
file_list.append(os.path.join(root, f))
Drag & Drop
def drop_files(event):
files = app.tk.splitlist(event.data)
for f in files:
if os.path.isfile(f):
file_list.append(f)
π 9. Export Results (CSV)
import csv
def export_csv():
path = filedialog.asksaveasfilename(defaultextension=".csv")
with open(path,"w",newline="",encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["File","Risk","Score","Issues"])
for row in tree.get_children():
data = tree.item(row)["values"]
if data[1] in ("HIGH","WARNING"):
writer.writerow(data)
π 10. Thread-Safe UI Updates
from queue import Empty
def process_queue():
try:
while True:
cmd, data = ui_queue.get_nowait()
if cmd == "result":
tree.insert("", "end", values=data, tags=(data[1],))
elif cmd == "progress":
progress_var.set(data)
elif cmd == "clear_results":
tree.delete(*tree.get_children())
except Empty:
pass
app.after(80, process_queue)
π― Final Thoughts
You just built a real-world security tool with:
β‘ Multithreading
π§ Risk scoring engine
π File analysis system
π Metadata sanitization
π₯οΈ Modern GUI
π‘ Next Improvements
Want to level this up?
π Add AI-based content scanning
πΈ Add thumbnail preview for all file types
π Add risk dashboard (charts)
π Add one-click βSanitize Allβ
Top comments (0)