import os
import camelot
import arabic_reshaper
from bidi.algorithm import get_display
import pandas as pd

def test_pdf_processing():
    # Use absolute path from project root with Windows path separators
    pdf_path = r"e:\projects\metaboard_backend\src\test\raw_file\پروپوزال ارائه سیم کارت به نمایندگان - 14030813.pdf"
    
    try:
        if not os.path.exists(pdf_path):
            print(f"File not found: {pdf_path}")
            return False
            
        print(f"Testing PDF processing for: {pdf_path}")
        print("-" * 50)
        
        # Extract tables from PDF using Camelot
        print("Attempting to extract tables...")
        tables = camelot.read_pdf(pdf_path, pages='all')
        
        if len(tables) > 0:
            print(f"\nFound {len(tables)} tables")
            for idx, table in enumerate(tables):
                print(f"\nTable {idx + 1}:")
                print(table.df)  # Convert to pandas DataFrame for display
                
                # Get table data
                data = table.df.values.tolist()
                headers = table.df.columns.tolist()
                
                # Handle RTL text in headers and data
                headers = [arabic_reshaper.reshape(str(h)) for h in headers]
                headers = [get_display(h) for h in headers]
                
                data = [[arabic_reshaper.reshape(str(cell)) for cell in row] for row in data]
                data = [[get_display(cell) for cell in row] for row in data]
                
                print("\nProcessed Table:")
                print("Headers:", headers)
                print("Data:", data)
        else:
            print("No tables found in the PDF")
            
        return True
    except Exception as e:
        print(f"Error: {str(e)}")
        return False

if __name__ == "__main__":
    success = test_pdf_processing()
    print(f"\nTest {'succeeded' if success else 'failed'}")