48 lines
1.3 KiB
Go
48 lines
1.3 KiB
Go
|
|
package pdf
|
||
|
|
|
||
|
|
import (
|
||
|
|
"fmt"
|
||
|
|
"os"
|
||
|
|
"regexp"
|
||
|
|
"strconv"
|
||
|
|
)
|
||
|
|
|
||
|
|
// CountPDFPages counts the number of pages in a PDF file.
|
||
|
|
// It reads the PDF and extracts the page count from its internal structure.
|
||
|
|
func CountPDFPages(filePath string) (int, error) {
|
||
|
|
// Check if file exists
|
||
|
|
if _, err := os.Stat(filePath); err != nil {
|
||
|
|
return 0, fmt.Errorf("file not found: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Read the PDF file
|
||
|
|
data, err := os.ReadFile(filePath)
|
||
|
|
if err != nil {
|
||
|
|
return 0, fmt.Errorf("failed to read file: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Convert to string for regex matching
|
||
|
|
content := string(data)
|
||
|
|
|
||
|
|
// Try to find /Type /Pages /Count pattern
|
||
|
|
// This is the most reliable indicator of page count in PDF structure
|
||
|
|
pageCountRegex := regexp.MustCompile(`/Type\s+/Pages\s+/Kids\s*\[(.*?)\]\s+/Count\s+(\d+)`)
|
||
|
|
matches := pageCountRegex.FindStringSubmatch(content)
|
||
|
|
if len(matches) >= 3 {
|
||
|
|
count, err := strconv.Atoi(matches[2])
|
||
|
|
if err == nil && count > 0 {
|
||
|
|
return count, nil
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Fallback: count individual /Type /Page objects
|
||
|
|
pageRegex := regexp.MustCompile(`/Type\s+/Page[\s/\(\)]`)
|
||
|
|
pageMatches := pageRegex.FindAllString(content, -1)
|
||
|
|
if len(pageMatches) > 0 {
|
||
|
|
return len(pageMatches), nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// If we can't determine page count, return 0 (indicates error or unknown)
|
||
|
|
return 0, fmt.Errorf("unable to determine page count")
|
||
|
|
}
|