Find text from image OCR - Python

import os import re from PIL import Image import pytesseract import cv2 import numpy as np # ✅ Path to Tesseract (Windows) pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # ✅ Folder containing GoPro images folder = "./images" # ✅ List all image files (filter common extensions) valid_exts = (".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG") files = [f for f in os.listdir(folder) if f.endswith(valid_exts)] # --- Loop over each file --- for i, file in enumerate(files, start=1): path = os.path.join(folder, file) print(f"\n📸 Processing file {i}/{len(files)}: {file}") # --- STEP 1: Clean metadata using Pillow --- try: with Image.open(path) as img: img = img.convert("RGB") # Drop nonstandard metadata clean_path = os.path.join(folder, f"clean_{i}.jpg") img.save(clean_path, "JPEG", quality=100) except Exception as e: print(f"❌ Could not open {file}: {e}") continue # --- STEP 2: Load with OpenCV --- img = cv2.imread(clean_path) if img is None: print(f"⚠️ Skipping {file} (could not read with OpenCV)") continue # --- STEP 3: Preprocessing --- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] gray = cv2.medianBlur(gray, 3) # --- STEP 4: Deskew (if text is tilted) --- coords = np.column_stack(np.where(gray > 0)) if len(coords) > 0: angle = cv2.minAreaRect(coords)[-1] if angle < -45: angle = -(90 + angle) else: angle = -angle (h, w) = gray.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 1.0) gray = cv2.warpAffine(gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # --- STEP 5: Resize to improve OCR --- scale = 2.0 resized = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # --- STEP 6: Save preprocessed image (optional for debugging) --- processed_path = os.path.join(folder, f"processed_{i}.jpg") cv2.imwrite(processed_path, resized) # --- STEP 7: OCR with tuned config --- config = "--psm 6" text = pytesseract.image_to_string(resized, config=config, lang="eng") # --- STEP 8: Extract dot-containing words (like URLs, filenames, etc.) --- pattern = r'\b\w+\.\w{2,}\b' matches = re.findall(pattern, text) if matches: print("✅ Matches found:") for match in matches: print(" ", match) else: print("⚠️ No matches found.") print("\n✅ Processing complete.")