import { logger } from '../../utils/logger';
import { pdfLib } from './pdf-config';

interface ParsedSection {
  title?: string;
  content: string;
  pageNumber: number;
}

export async function parsePDFContent(arrayBuffer: ArrayBuffer): Promise<ParsedSection[]> {
  try {
    const loadingTask = pdfLib.getDocument(arrayBuffer);
    const pdf = await loadingTask.promise;
    const sections: ParsedSection[] = [];
    
    for (let i = 1; i <= pdf.numPages; i++) {
      const page = await pdf.getPage(i);
      const content = await page.getTextContent();
      
      // Group text items by their vertical position to identify sections
      const items = content.items as any[];
      let currentSection: ParsedSection = { content: '', pageNumber: i };
      let lastY = null;
      
      for (const item of items) {
        const text = item.str.trim();
        if (!text) continue;

        // Check if this might be a heading (larger font or significant spacing)
        const isHeading = item.height > 12 || (lastY && Math.abs(item.transform[5] - lastY) > 20);
        
        if (isHeading && !currentSection.title && text.length < 100) {
          currentSection.title = text;
        } else {
          currentSection.content += text + ' ';
        }
        
        lastY = item.transform[5];
      }

      if (currentSection.content.trim()) {
        sections.push(currentSection);
      }
    }
    
    logger.debug('PDF parsed into sections:', { 
      sectionCount: sections.length,
      totalPages: pdf.numPages 
    });
    
    return sections;
  } catch (error) {
    logger.error('Error parsing PDF content:', error);
    throw new Error('Failed to parse PDF content');
  }
}