Files
html2pdf/converter/converter.go
2026-01-07 17:04:26 +01:00

364 lines
9.1 KiB
Go

package converter
import (
"archive/zip"
"bytes"
"context"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"time"
"gitea.tecamino.com/paadi/html2pdf/models"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
)
// html to pdf converter structure for
type Converter struct {
chromePath string
allocCtx context.Context
allocCancel context.CancelFunc // Cancels the whole Chrome process manager
browserCtx context.Context // The specific browser instance
browserCancel context.CancelFunc // Closes the browser
progress func(progress int)
}
// NewConverter starts a new converter instance with a chrome headless shell executable
func NewConverter(chromePath string) (*Converter, error) {
var err error
c := &Converter{chromePath: chromePath}
chromePath, err = c.getChromePath()
if err != nil {
return nil, err
}
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.ExecPath(chromePath),
chromedp.NoSandbox,
chromedp.Headless,
chromedp.DisableGPU,
)
opts = append(opts, platformOptions())
c.allocCtx, c.allocCancel = chromedp.NewExecAllocator(context.Background(), opts...)
c.browserCtx, c.browserCancel = chromedp.NewContext(c.allocCtx)
// 5. "Warm up" the browser to ensure the executable actually runs
// This catches "file not found" or permission errors immediately
err = chromedp.Run(c.browserCtx)
if err != nil {
c.Close() // Cleanup if start fails
return nil, fmt.Errorf("failed to start chrome: %w", err)
}
return c, nil
}
func (c *Converter) SetProgressCallback(cb func(progress int)) {
c.progress = cb
}
// Convert converts all given input files
func (c *Converter) ConvertToPdf(files ...models.File) error {
for i, f := range files {
if c.progress != nil {
c.progress(i + 1)
}
if f.Input == "" || filepath.Ext(f.Input) != ".html" {
return fmt.Errorf("no .html input file path provided: %s", f.Input)
} else if f.Output == "" || filepath.Ext(f.Output) != ".pdf" {
return fmt.Errorf("no .pdf output file path provided: %s", f.Output)
}
var htmlURL strings.Builder
htmlURL.WriteString("file://")
switch runtime.GOOS {
case "windows":
htmlURL.WriteString("/")
}
// Convert to absolute path
absPath, err := filepath.Abs(f.Input)
if err != nil {
return err
}
htmlURL.WriteString(filepath.ToSlash(absPath))
taskCtx, taskCancel := chromedp.NewContext(c.browserCtx)
timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, 60*time.Second)
var pdfData []byte
err = chromedp.Run(timeoutCtx,
chromedp.Navigate(htmlURL.String()),
chromedp.WaitReady("body", chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
buf, _, err := page.PrintToPDF().
WithPrintBackground(true).
WithPaperWidth(8.27).
WithPaperHeight(11.69).
Do(ctx)
if err != nil {
return err
}
pdfData = buf
return nil
}),
)
timeoutCancel()
taskCancel()
if err != nil {
return err
}
// Save PDF to file
if err := os.WriteFile(f.Output, pdfData, 0644); err != nil {
return err
}
}
return nil
}
// Convert converts all given input files
func (c *Converter) ConvertHtmlsToPdf(htmls ...models.Html) error {
for i, h := range htmls {
if c.progress != nil {
c.progress(i + 1)
}
if len(h.Html) == 0 {
return fmt.Errorf("no .html input provided")
} else if h.Output == "" || filepath.Ext(h.Output) != ".pdf" {
return fmt.Errorf("no .pdf output file path provided: %s", h.Output)
}
taskCtx, taskCancel := chromedp.NewContext(c.browserCtx)
timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, 60*time.Second)
var pdfData []byte
err := chromedp.Run(timeoutCtx,
// Start with a blank page
chromedp.Navigate("about:blank"),
// Inject HTML directly
chromedp.ActionFunc(func(ctx context.Context) error {
frameTree, err := page.GetFrameTree().Do(ctx)
if err != nil {
return err
}
return page.SetDocumentContent(frameTree.Frame.ID, string(h.Html)).Do(ctx)
}),
chromedp.WaitReady("body", chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
buf, _, err := page.PrintToPDF().
WithPrintBackground(true).
WithPaperWidth(8.27).
WithPaperHeight(11.69).
Do(ctx)
if err != nil {
return err
}
pdfData = buf
return nil
}),
)
timeoutCancel()
taskCancel()
if err != nil {
return err
}
// Save PDF to file
if err := os.WriteFile(h.Output, pdfData, 0644); err != nil {
return err
}
}
return nil
}
// Convert converts all given input files
func (c *Converter) ConvertHtmlsToBytes(htmls ...models.Html) ([][]byte, error) {
var output [][]byte
for i, h := range htmls {
if c.progress != nil {
c.progress(i + 1)
}
if len(h.Html) == 0 {
return nil, fmt.Errorf("no .html input provided")
} else if h.Output == "" || filepath.Ext(h.Output) != ".pdf" {
return nil, fmt.Errorf("no .pdf output file path provided: %s", h.Output)
}
taskCtx, taskCancel := chromedp.NewContext(c.browserCtx)
timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, 60*time.Second)
var pdfData []byte
err := chromedp.Run(timeoutCtx,
// Start with a blank page
chromedp.Navigate("about:blank"),
// Inject HTML directly
chromedp.ActionFunc(func(ctx context.Context) error {
frameTree, err := page.GetFrameTree().Do(ctx)
if err != nil {
return err
}
return page.SetDocumentContent(frameTree.Frame.ID, string(h.Html)).Do(ctx)
}),
chromedp.WaitReady("body", chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
buf, _, err := page.PrintToPDF().
WithPrintBackground(true).
WithPaperWidth(8.27).
WithPaperHeight(11.69).
Do(ctx)
if err != nil {
return err
}
pdfData = buf
return nil
}),
)
timeoutCancel()
taskCancel()
if err != nil {
return nil, err
}
// add to array of bytes
output = append(output, pdfData)
}
return output, nil
}
// Convert html to pdf and return zip as bytes
func (c *Converter) ConvertHtmlsToZip(htmls ...models.Html) ([]byte, error) {
zipBuf := new(bytes.Buffer)
zipWriter := zip.NewWriter(zipBuf)
for i, h := range htmls {
if c.progress != nil {
c.progress(i + 1)
}
if len(h.Html) == 0 {
return nil, fmt.Errorf("no .html input provided")
} else if h.Output == "" || filepath.Ext(h.Output) != ".pdf" {
return nil, fmt.Errorf("no .pdf output file path provided: %s", h.Output)
}
taskCtx, taskCancel := chromedp.NewContext(c.browserCtx)
timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, 60*time.Second)
var pdfData []byte
err := chromedp.Run(timeoutCtx,
// Start with a blank page
chromedp.Navigate("about:blank"),
// Inject HTML directly
chromedp.ActionFunc(func(ctx context.Context) error {
frameTree, err := page.GetFrameTree().Do(ctx)
if err != nil {
return err
}
return page.SetDocumentContent(frameTree.Frame.ID, string(h.Html)).Do(ctx)
}),
chromedp.WaitReady("body", chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
buf, _, err := page.PrintToPDF().
WithPrintBackground(true).
WithPaperWidth(8.27).
WithPaperHeight(11.69).
Do(ctx)
if err != nil {
return err
}
pdfData = buf
return nil
}),
)
timeoutCancel()
taskCancel()
if err != nil {
return nil, err
}
// add to zip
f, err := zipWriter.Create(h.Output)
if err != nil {
return nil, err
}
f.Write(pdfData)
}
zipWriter.Close()
return zipBuf.Bytes(), nil
}
func (c *Converter) Close() {
// Close browser first, then allocator
if c.browserCancel != nil {
c.browserCancel()
}
if c.allocCancel != nil {
c.allocCancel()
}
}
// getChromePath checks for system Chrome, else falls back to bundled headless shell
func (c *Converter) getChromePath() (string, error) {
chromeExec := "chrome-headless-shell"
if runtime.GOOS == "windows" {
chromeExec += ".exe"
}
path := filepath.Join(c.chromePath, chromeExec)
if _, err := os.Stat(path); err == nil {
return path, nil
}
// Candidate paths for system Chrome
candidates := []string{}
// Fallback:
switch runtime.GOOS {
case "windows":
candidates = []string{
`C:\Program Files\Google\Chrome\Application\chrome.exe`,
`C:\Program Files (x86)\Google\Chrome\Application\chrome.exe`,
`C:\Program Files\Chromium\Application\chrome.exe`,
}
case "darwin":
candidates = []string{
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
}
default: // Linux
candidates = []string{
"/usr/bin/google-chrome",
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
}
}
// Check system paths first
for _, path := range candidates {
if _, err := os.Stat(path); err == nil {
return path, nil
}
}
return "", errors.New("chrome path not found")
}