#!/usr/bin/env python3
import pytesseract
from PIL import Image
import os
# Define the path to the directory with images
directory = os.getcwd()
# Set the pytesseract path to the Tesseract executable
# This is only needed if Tesseract is not in your PATH;
# it's not required in this environment
pytesseract.pytesseract.tesseract_cmd = r"/opt/homebrew/bin/tesseract"
# Function to extract text and save as .md file
def extract_text_and_save(image_path):
# Open the image file
img = Image.open(image_path)
# Use pytesseract to do OCR on the image
text = pytesseract.image_to_string(img)
# Create a Markdown filename with the same name as the image file
md_filename = os.path.splitext(image_path)[0] + ".md"
# Save the extracted text to the .md file
with open(md_filename, "w") as f:
f.write(text)
# Iterate over the files in the directory
for filename in os.listdir(directory):
if filename.lower().endswith((".jpg")):
image_path = os.path.join(directory, filename)
extract_text_and_save(image_path)