package pdf import ( "fmt" "os" "regexp" "strconv" ) // CountPDFPages counts the number of pages in a PDF file. // It reads the PDF and extracts the page count from its internal structure. func CountPDFPages(filePath string) (int, error) { // Check if file exists if _, err := os.Stat(filePath); err != nil { return 0, fmt.Errorf("file not found: %w", err) } // Read the PDF file data, err := os.ReadFile(filePath) if err != nil { return 0, fmt.Errorf("failed to read file: %w", err) } // Convert to string for regex matching content := string(data) // Try to find /Type /Pages /Count pattern // This is the most reliable indicator of page count in PDF structure pageCountRegex := regexp.MustCompile(`/Type\s+/Pages\s+/Kids\s*\[(.*?)\]\s+/Count\s+(\d+)`) matches := pageCountRegex.FindStringSubmatch(content) if len(matches) >= 3 { count, err := strconv.Atoi(matches[2]) if err == nil && count > 0 { return count, nil } } // Fallback: count individual /Type /Page objects pageRegex := regexp.MustCompile(`/Type\s+/Page[\s/\(\)]`) pageMatches := pageRegex.FindAllString(content, -1) if len(pageMatches) > 0 { return len(pageMatches), nil } // If we can't determine page count, return 0 (indicates error or unknown) return 0, fmt.Errorf("unable to determine page count") }