From 055bc70935c9fb6b13dca082583d95d71c14e5c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Z=C3=BCrcher?= Date: Mon, 15 Dec 2025 12:24:54 +0100 Subject: [PATCH] first commit --- .gitignore | 2 + go.mod | 5 ++ go.sum | 2 + internal/pdfmerge/consts.go | 11 +++ internal/pdfmerge/dir.go | 41 ++++++++++++ internal/pdfmerge/img.go | 129 ++++++++++++++++++++++++++++++++++++ internal/pdfmerge/pdf.go | 82 +++++++++++++++++++++++ internal/pdfmerge/source.go | 122 ++++++++++++++++++++++++++++++++++ internal/pdfmerge/util.go | 41 ++++++++++++ pdfmerge.go | 25 +++++++ pdfmerge_test.go | 11 +++ 11 files changed, 471 insertions(+) create mode 100644 .gitignore create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/pdfmerge/consts.go create mode 100644 internal/pdfmerge/dir.go create mode 100644 internal/pdfmerge/img.go create mode 100644 internal/pdfmerge/pdf.go create mode 100644 internal/pdfmerge/source.go create mode 100644 internal/pdfmerge/util.go create mode 100644 pdfmerge.go create mode 100644 pdfmerge_test.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b6e4a89 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +dst +*pdf \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..7b57da3 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module pdfmerge + +go 1.25.4 + +require golang.org/x/image v0.34.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..4b59bdb --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/image v0.34.0 h1:33gCkyw9hmwbZJeZkct8XyR11yH889EQt/QH4VmXMn8= +golang.org/x/image v0.34.0/go.mod h1:2RNFBZRB+vnwwFil8GkMdRvrJOFd1AzdZI6vOY+eJVU= diff --git a/internal/pdfmerge/consts.go b/internal/pdfmerge/consts.go new file mode 100644 index 0000000..a8dfbd0 --- /dev/null +++ b/internal/pdfmerge/consts.go @@ -0,0 +1,11 @@ +package pdfmerge + +var size, margin string +var scaleH, scaleW, landscape bool +var JPEGQuality int + +const ( + DefaultSize = "IMG-SIZE" + DefaultMargin = "0,0,0,0" + VERSION = "1.2.0" +) diff --git a/internal/pdfmerge/dir.go b/internal/pdfmerge/dir.go new file mode 100644 index 0000000..d98438f --- /dev/null +++ b/internal/pdfmerge/dir.go @@ -0,0 +1,41 @@ +package pdfmerge + +import ( + "os" + "path/filepath" + + "pdfmerge/internal/pdf/creator" +) + +type DirSource struct { + path string + files []Mergeable +} + +func (s DirSource) MergeTo(c *creator.Creator) (err error) { + for _, mergeableFile := range s.files { + err = mergeableFile.MergeTo(c) + if err != nil { + return err + } + } + return +} + +func (s *DirSource) scanMergeables() error { + filesInDir, err := os.ReadDir(s.path) + if err != nil { + return err + } + for _, file := range filesInDir { + if file.IsDir() { + continue + } + mergableFiles, err := getMergeableFile(filepath.Join(s.path, file.Name()), []int{}) + if err != nil { + return err + } + s.files = append(s.files, mergableFiles) + } + return nil +} diff --git a/internal/pdfmerge/img.go b/internal/pdfmerge/img.go new file mode 100644 index 0000000..43af039 --- /dev/null +++ b/internal/pdfmerge/img.go @@ -0,0 +1,129 @@ +package pdfmerge + +import ( + "fmt" + "strconv" + "strings" + + "os" + + "pdfmerge/internal/pdf/creator" + + "pdfmerge/internal/pdf/core" + + "golang.org/x/image/tiff" +) + +var pageMargin [4]float64 +var pageSize creator.PageSize +var sizeHasSet, marginHasSet = false, false +var tiffExts = []string{".tiff", ".TIFF", ".tif", ".TIF"} + +type ImgSource struct { + source +} + +func (s ImgSource) MergeTo(c *creator.Creator) error { + return addImage(s.source, c) +} + +func addImage(s source, c *creator.Creator) error { + img, err := createImage(s) + if err != nil { + return err + } + + // The following funcs must be called in sequence + setEncoding(img, s) + setMargin(img, c) + setSize(img, c) + + c.NewPage() + err = c.Draw(img) + if err != nil { + return err + } + return nil +} + +func createImage(s source) (*creator.Image, error) { + if in_array(s.ext, tiffExts) { + f, _ := os.Open(s.path) + i, err := tiff.Decode(f) + if err != nil { + return nil, err + } + return creator.NewImageFromGoImage(i) + } + return creator.NewImageFromFile(s.path) +} + +func setMargin(img *creator.Image, c *creator.Creator) error { + if !marginHasSet { + for i, m := range strings.Split(margin, ",") { + floatVal, err := strconv.ParseFloat(m, 64) + if err != nil { + return fmt.Errorf("error: -m|--margin MUST be 4 comma separated int/float numbers. %s found", m) + } + + pageMargin[i] = floatVal * creator.PPI + } + if len(pageMargin) != 4 { + return fmt.Errorf("error: -m|--margin MUST be 4 comma separated int/float numbers. %s provided", margin) + } + marginHasSet = true + } + c.SetPageMargins(pageMargin[0], pageMargin[1], pageMargin[2], pageMargin[3]) + img.SetPos(pageMargin[0], pageMargin[3]) + return nil +} + +func setSize(img *creator.Image, c *creator.Creator) error { + if size == DefaultSize { + // Width height with adding margin + w := img.Width() + pageMargin[0] + pageMargin[1] + h := img.Height() + pageMargin[2] + pageMargin[3] + + pageSize = creator.PageSize{w, h} + } else { + sizeHasSet = true + switch size { + case "A4": + pageSize = creator.PageSizeA4 + case "A3": + pageSize = creator.PageSizeA3 + case "Legal": + pageSize = creator.PageSizeLegal + case "Letter": + pageSize = creator.PageSizeLetter + default: + return fmt.Errorf("error: -s|--size MUST be one of A4, A3, Legal or Letter. %s given", size) + } + + if scaleH { + img.ScaleToHeight(pageSize[1] - (pageMargin[2] + pageMargin[3])) + } else if scaleW { + img.ScaleToWidth(pageSize[0] - (pageMargin[0] + pageMargin[1])) + } + } + if landscape { + c.SetPageSize(creator.PageSize{pageSize[1], pageSize[0]}) + } else { + c.SetPageSize(pageSize) + } + return nil +} + +// Set appropriate encoding for JPEG and TIFF +// MUST be called before changing image size +func setEncoding(img *creator.Image, s source) { + switch s.mime { + case "image/jpeg": + encoder := core.NewDCTEncoder() + encoder.Quality = JPEGQuality + // Encoder dimensions must match the raw image data dimensions. + encoder.Width = int(img.Width()) + encoder.Height = int(img.Height()) + img.SetEncoder(encoder) + } +} diff --git a/internal/pdfmerge/pdf.go b/internal/pdfmerge/pdf.go new file mode 100644 index 0000000..7674e85 --- /dev/null +++ b/internal/pdfmerge/pdf.go @@ -0,0 +1,82 @@ +package pdfmerge + +import ( + "errors" + "io" + "os" + + "pdfmerge/internal/pdf/creator" + + pdf "pdfmerge/internal/pdf/model" +) + +type PDFSource struct { + source +} + +func (s PDFSource) MergeTo(c *creator.Creator) error { + f, _ := os.Open(s.path) + defer f.Close() + + return addPdfPages(f, s.pages, c) +} + +func getReader(rs io.ReadSeeker) (*pdf.PdfReader, error) { + + pdfReader, err := pdf.NewPdfReader(rs) + if err != nil { + return nil, err + } + + isEncrypted, err := pdfReader.IsEncrypted() + if err != nil { + return nil, err + } + + if isEncrypted { + auth, err := pdfReader.Decrypt([]byte("")) + if err != nil { + return nil, err + } + if !auth { + return nil, errors.New("cannot merge encrypted, password protected document") + } + } + + return pdfReader, nil +} + +func addPdfPages(file *os.File, pages []int, c *creator.Creator) error { + pdfReader, err := getReader(file) + if err != nil { + return err + } + + if len(pages) > 0 { + for _, pageNo := range pages { + if page, pageErr := pdfReader.GetPage(pageNo); pageErr != nil { + return pageErr + } else { + err = c.AddPage(page) + } + } + } else { + numPages, err := pdfReader.GetNumPages() + if err != nil { + return err + } + for i := 0; i < numPages; i++ { + pageNum := i + 1 + + page, err := pdfReader.GetPage(pageNum) + if err != nil { + return err + } + if err = c.AddPage(page); err != nil { + return err + } + } + } + + return err +} diff --git a/internal/pdfmerge/source.go b/internal/pdfmerge/source.go new file mode 100644 index 0000000..77c268b --- /dev/null +++ b/internal/pdfmerge/source.go @@ -0,0 +1,122 @@ +package pdfmerge + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "pdfmerge/internal/pdf/creator" +) + +type Mergeable interface { + MergeTo(c *creator.Creator) error +} + +type source struct { + path, sourceType, mime, ext string + pages []int +} + +// Initiate new source file from input argument +func NewSource(input string) (Mergeable, error) { + fileInputParts := strings.Split(input, "~") + + path := fileInputParts[0] + var inputSource Mergeable + + info, err := os.Stat(path) + if err != nil { + return nil, err + } + + switch mode := info.Mode(); { + case mode.IsDir(): + inputSource = getMergeableDir(path) + case mode.IsRegular(): + pages := []int{} + if len(fileInputParts) > 1 { + pages = parsePageNums(fileInputParts[1]) + } + inputSource, err = getMergeableFile(path, pages) + if err != nil { + return nil, err + } + } + + return inputSource, nil +} + +func getMergeableDir(path string) Mergeable { + dir := DirSource{path: path} + dir.scanMergeables() + + return dir +} + +func getMergeableFile(path string, pages []int) (Mergeable, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("cannot read source file: %s", path) + } + + defer f.Close() + + ext := filepath.Ext(f.Name()) + mime, err := getMimeType(f) + if err != nil { + return nil, fmt.Errorf("error in getting mime type of file: %s", path) + } + + sourceType, err := getFileType(mime, ext) + if err != nil { + return nil, fmt.Errorf("error : %s (%s)", err.Error(), path) + } + + source := source{path, sourceType, mime, ext, pages} + + var m Mergeable + switch sourceType { + case "image": + m = ImgSource{source} + case "pdf": + m = PDFSource{source} + } + + return m, nil +} + +func getFileType(mime, ext string) (string, error) { + pdfExts := []string{".pdf", ".PDF"} + imgExts := []string{".jpg", ".jpeg", ".gif", ".png", ".tiff", ".tif", ".JPG", ".JPEG", ".GIF", ".PNG", ".TIFF", ".TIF"} + + switch { + case mime == "application/pdf": + return "pdf", nil + case mime[:6] == "image/": + return "image", nil + case mime == "application/octet-stream" && in_array(ext, pdfExts): + return "pdf", nil + case mime == "application/octet-stream" && in_array(ext, imgExts): + return "image", nil + } + + return "error", errors.New("file type not acceptable. ") +} + +func parsePageNums(pagesInput string) []int { + pages := []int{} + + for _, e := range strings.Split(pagesInput, ",") { + pageNo, err := strconv.Atoi(strings.Trim(e, " \n")) + if err != nil { + fmt.Printf("invalid format! Example of a file input with page numbers: path/to/abc.pdf~1,2,3,5,6") + os.Exit(1) + } + pages = append(pages, pageNo) + } + + return pages +} diff --git a/internal/pdfmerge/util.go b/internal/pdfmerge/util.go new file mode 100644 index 0000000..6103d07 --- /dev/null +++ b/internal/pdfmerge/util.go @@ -0,0 +1,41 @@ +package pdfmerge + +import ( + "net/http" + "os" + "reflect" +) + +func in_array(val, array any) bool { + return at_array(val, array) != -1 +} + +func at_array(val, array any) (index int) { + index = -1 + + switch reflect.TypeOf(array).Kind() { + case reflect.Slice: + s := reflect.ValueOf(array) + + for i := 0; i < s.Len(); i++ { + if reflect.DeepEqual(val, s.Index(i).Interface()) { + index = i + return + } + } + } + + return +} + +func getMimeType(file *os.File) (string, error) { + // Only the first 512 bytes are used to sniff the content type. + buffer := make([]byte, 512) + _, readError := file.Read(buffer) + if readError != nil { + return "error", readError + } + + // Always returns a valid content-type and "application/octet-stream" if no others seemed to match. + return http.DetectContentType(buffer), nil +} diff --git a/pdfmerge.go b/pdfmerge.go new file mode 100644 index 0000000..0bb86a4 --- /dev/null +++ b/pdfmerge.go @@ -0,0 +1,25 @@ +package pdfmerge + +import ( + "fmt" + merge "pdfmerge/internal/pdfmerge" + + "pdfmerge/internal/pdf/creator" +) + +func Pdfmerge(inputpath []string, outputpath string) error { + c := creator.New() + + for _, arg := range inputpath { + source, err := merge.NewSource(arg) + if err != nil { + return err + } + err = source.MergeTo(c) + if err != nil { + return fmt.Errorf("error: %s (%s)", err.Error(), arg) + + } + } + return c.WriteToFile(outputpath) +} diff --git a/pdfmerge_test.go b/pdfmerge_test.go new file mode 100644 index 0000000..1fe5d45 --- /dev/null +++ b/pdfmerge_test.go @@ -0,0 +1,11 @@ +package pdfmerge + +import ( + "testing" +) + +func TestMerge(t *testing.T) { + t.Log("start pdf merge test") + file := []string{"./dst"} + Pdfmerge(file, "output.pdf") +}