package model import ( "bufio" "crypto/md5" "crypto/rand" "errors" "fmt" "io" "math" "os" "time" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core" ) var pdfCreator = "" func getPdfCreator() string { if len(pdfCreator) > 0 { return pdfCreator } // Return default. return "" } func SetPdfCreator(creator string) { pdfCreator = creator } type PdfWriter struct { root *core.PdfIndirectObject pages *core.PdfIndirectObject objects []core.PdfObject objectsMap map[core.PdfObject]bool // Quick lookup table. writer *bufio.Writer outlineTree *PdfOutlineTreeNode catalog *core.PdfObjectDictionary infoObj *core.PdfIndirectObject // Encryption crypter *core.PdfCrypt encryptDict *core.PdfObjectDictionary encryptObj *core.PdfIndirectObject ids *core.PdfObjectArray // PDF version majorVersion int minorVersion int // Objects to be followed up on prior to writing. // These are objects that are added and reference objects that are not included // for writing. // The map stores the object and the dictionary it is contained in. // Only way so we can access the dictionary entry later. pendingObjects map[core.PdfObject]*core.PdfObjectDictionary // Forms. acroForm *PdfAcroForm } func NewPdfWriter() PdfWriter { w := PdfWriter{} w.objectsMap = map[core.PdfObject]bool{} w.objects = []core.PdfObject{} w.pendingObjects = map[core.PdfObject]*core.PdfObjectDictionary{} // PDF Version. Can be changed if using more advanced features in PDF. // By default it is set to 1.3. w.majorVersion = 1 w.minorVersion = 3 // Creation info. infoDict := core.MakeDict() infoDict.Set("Producer", core.MakeString("")) infoDict.Set("Creator", core.MakeString(getPdfCreator())) infoObj := core.PdfIndirectObject{} infoObj.PdfObject = infoDict w.infoObj = &infoObj w.addObject(&infoObj) // Root catalog. catalog := core.PdfIndirectObject{} catalogDict := core.MakeDict() catalogDict.Set("Type", core.MakeName("Catalog")) catalog.PdfObject = catalogDict w.root = &catalog w.addObject(&catalog) // Pages. pages := core.PdfIndirectObject{} pagedict := core.MakeDict() pagedict.Set("Type", core.MakeName("Pages")) kids := core.PdfObjectArray{} pagedict.Set("Kids", &kids) pagedict.Set("Count", core.MakeInteger(0)) pages.PdfObject = pagedict w.pages = &pages w.addObject(&pages) catalogDict.Set("Pages", &pages) w.catalog = catalogDict common.Log.Trace("Catalog %s", catalog) return w } // Set the PDF version of the output file. func (pw *PdfWriter) SetVersion(majorVersion, minorVersion int) { pw.majorVersion = majorVersion pw.minorVersion = minorVersion } // Set the optional content properties. func (pw *PdfWriter) SetOCProperties(ocProperties core.PdfObject) error { dict := pw.catalog if ocProperties != nil { common.Log.Trace("Setting OC Properties...") dict.Set("OCProperties", ocProperties) // Any risk of infinite loops? pw.addObjects(ocProperties) } return nil } func (pw *PdfWriter) hasObject(obj core.PdfObject) bool { // Check if already added. for _, o := range pw.objects { // GH: May perform better to use a hash map to check if added? if o == obj { return true } } return false } // Adds the object to list of objects and returns true if the obj was // not already added. // Returns false if the object was previously added. func (pw *PdfWriter) addObject(obj core.PdfObject) bool { hasObj := pw.hasObject(obj) if !hasObj { pw.objects = append(pw.objects, obj) return true } return false } func (pw *PdfWriter) addObjects(obj core.PdfObject) error { common.Log.Trace("Adding objects!") if io, isIndirectObj := obj.(*core.PdfIndirectObject); isIndirectObj { common.Log.Trace("Indirect") common.Log.Trace("- %s (%p)", obj, io) common.Log.Trace("- %s", io.PdfObject) if pw.addObject(io) { err := pw.addObjects(io.PdfObject) if err != nil { return err } } return nil } if so, isStreamObj := obj.(*core.PdfObjectStream); isStreamObj { common.Log.Trace("Stream") common.Log.Trace("- %s %p", obj, obj) if pw.addObject(so) { err := pw.addObjects(so.PdfObjectDictionary) if err != nil { return err } } return nil } if dict, isDict := obj.(*core.PdfObjectDictionary); isDict { common.Log.Trace("Dict") common.Log.Trace("- %s", obj) for _, k := range dict.Keys() { v := dict.Get(k) common.Log.Trace("Key %s", k) if k != "Parent" { err := pw.addObjects(v) if err != nil { return err } } else { if _, parentIsNull := dict.Get("Parent").(*core.PdfObjectNull); parentIsNull { // Parent is null. We can ignore it. continue } if hasObj := pw.hasObject(v); !hasObj { common.Log.Debug("Parent obj is missing!! %T %p %v", v, v, v) pw.pendingObjects[v] = dict // Although it is missing at this point, it could be added later... } // How to handle the parent? Make sure it is present? if parentObj, parentIsRef := dict.Get("Parent").(*core.PdfObjectReference); parentIsRef { // Parent is a reference. Means we can drop it? // Could refer to somewhere outside of the scope of the output doc. // Should be done by the reader already. // -> ERROR. common.Log.Debug("error: Parent is a reference object - Cannot be in writer (needs to be resolved)") return fmt.Errorf("parent is a reference object - Cannot be in writer (needs to be resolved) - %s", parentObj) } } } return nil } if arr, isArray := obj.(*core.PdfObjectArray); isArray { common.Log.Trace("Array") common.Log.Trace("- %s", obj) if arr == nil { return errors.New("array is nil") } for _, v := range *arr { err := pw.addObjects(v) if err != nil { return err } } return nil } if _, isReference := obj.(*core.PdfObjectReference); isReference { // Should never be a reference, should already be resolved. common.Log.Debug("error: Cannot be a reference!") return errors.New("reference not allowed") } return nil } // Add a page to the PDF file. The new page should be an indirect // object. func (pw *PdfWriter) AddPage(page *PdfPage) error { obj := page.ToPdfObject() common.Log.Trace("==========") common.Log.Trace("Appending to page list %T", obj) pageObj, ok := obj.(*core.PdfIndirectObject) if !ok { return errors.New("page should be an indirect object") } common.Log.Trace("%s", pageObj) common.Log.Trace("%s", pageObj.PdfObject) pDict, ok := pageObj.PdfObject.(*core.PdfObjectDictionary) if !ok { return errors.New("page object should be a dictionary") } otype, ok := pDict.Get("Type").(*core.PdfObjectName) if !ok { return fmt.Errorf("page should have a Type key with a value of type name (%T)", pDict.Get("Type")) } if *otype != "Page" { return errors.New("type != Page (Required)") } // Copy inherited fields if missing. inheritedFields := []core.PdfObjectName{"Resources", "MediaBox", "CropBox", "Rotate"} parent, hasParent := pDict.Get("Parent").(*core.PdfIndirectObject) common.Log.Trace("Page Parent: %T (%v)", pDict.Get("Parent"), hasParent) for hasParent { common.Log.Trace("Page Parent: %T", parent) parentDict, ok := parent.PdfObject.(*core.PdfObjectDictionary) if !ok { return errors.New("invalid Parent object") } for _, field := range inheritedFields { common.Log.Trace("Field %s", field) if pDict.Get(field) != nil { common.Log.Trace("- page has already") continue } if obj := parentDict.Get(field); obj != nil { // Parent has the field. Inherit, pass to the new page. common.Log.Trace("Inheriting field %s", field) pDict.Set(field, obj) } } parent, hasParent = parentDict.Get("Parent").(*core.PdfIndirectObject) common.Log.Trace("Next parent: %T", parentDict.Get("Parent")) } common.Log.Trace("Traversal done") // Update the dictionary. // Reuses the input object, updating the fields. pDict.Set("Parent", pw.pages) pageObj.PdfObject = pDict // Add to Pages. pagesDict, ok := pw.pages.PdfObject.(*core.PdfObjectDictionary) if !ok { return errors.New("invalid Pages obj (not a dict)") } kids, ok := pagesDict.Get("Kids").(*core.PdfObjectArray) if !ok { return errors.New("invalid Pages Kids obj (not an array)") } *kids = append(*kids, pageObj) pageCount, ok := pagesDict.Get("Count").(*core.PdfObjectInteger) if !ok { return errors.New("invalid Pages Count object (not an integer)") } // Update the count. *pageCount = *pageCount + 1 pw.addObject(pageObj) // Traverse the page and record all object references. err := pw.addObjects(pDict) if err != nil { return err } return nil } // Add outlines to a PDF file. func (pw *PdfWriter) AddOutlineTree(outlineTree *PdfOutlineTreeNode) { pw.outlineTree = outlineTree } // Add Acroforms to a PDF file. Sets the specified form for writing. func (pw *PdfWriter) SetForms(form *PdfAcroForm) error { pw.acroForm = form return nil } // Write out an indirect / stream object. func (pw *PdfWriter) writeObject(num int, obj core.PdfObject) { common.Log.Trace("Write obj #%d\n", num) if pobj, isIndirect := obj.(*core.PdfIndirectObject); isIndirect { outStr := fmt.Sprintf("%d 0 obj\n", num) outStr += pobj.PdfObject.DefaultWriteString() outStr += "\nendobj\n" pw.writer.WriteString(outStr) return } // XXX/TODO: Add a default encoder if Filter not specified? // Still need to make sure is encrypted. if pobj, isStream := obj.(*core.PdfObjectStream); isStream { outStr := fmt.Sprintf("%d 0 obj\n", num) outStr += pobj.PdfObjectDictionary.DefaultWriteString() outStr += "\nstream\n" pw.writer.WriteString(outStr) pw.writer.Write(pobj.Stream) pw.writer.WriteString("\nendstream\nendobj\n") return } pw.writer.WriteString(obj.DefaultWriteString()) } // Update all the object numbers prior to writing. func (pw *PdfWriter) updateObjectNumbers() { // Update numbers for idx, obj := range pw.objects { if io, isIndirect := obj.(*core.PdfIndirectObject); isIndirect { io.ObjectNumber = int64(idx + 1) io.GenerationNumber = 0 } if so, isStream := obj.(*core.PdfObjectStream); isStream { so.ObjectNumber = int64(idx + 1) so.GenerationNumber = 0 } } } type EncryptOptions struct { Permissions core.AccessPermissions Algorithm EncryptionAlgorithm } // EncryptionAlgorithm is used in EncryptOptions to change the default algorithm used to encrypt the document. type EncryptionAlgorithm int const ( // RC4_128bit uses RC4 encryption (128 bit) RC4_128bit = EncryptionAlgorithm(iota) // AES_128bit uses AES encryption (128 bit, PDF 1.6) AES_128bit // AES_256bit uses AES encryption (256 bit, PDF 2.0) AES_256bit ) // Encrypt the output file with a specified user/owner password. func (pw *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptions) error { crypter := core.PdfCrypt{} pw.crypter = &crypter crypter.EncryptedObjects = map[core.PdfObject]bool{} crypter.CryptFilters = core.CryptFilters{} algo := RC4_128bit if options != nil { algo = options.Algorithm } var cf core.CryptFilter switch algo { case RC4_128bit: crypter.V = 2 crypter.R = 3 cf = core.NewCryptFilterV2(16) case AES_128bit: pw.SetVersion(1, 5) crypter.V = 4 crypter.R = 4 cf = core.NewCryptFilterAESV2() case AES_256bit: pw.SetVersion(2, 0) crypter.V = 5 crypter.R = 6 // TODO(dennwc): a way to set R=5? cf = core.NewCryptFilterAESV3() default: return fmt.Errorf("unsupported algorithm: %v", options.Algorithm) } crypter.Length = cf.Length * 8 const ( defaultFilter = core.StandardCryptFilter ) crypter.CryptFilters[defaultFilter] = cf if crypter.V >= 4 { crypter.StreamFilter = defaultFilter crypter.StringFilter = defaultFilter } // Set crypter.P = math.MaxUint32 crypter.EncryptMetadata = true if options != nil { crypter.P = int(options.Permissions.GetP()) } // Generate the encryption dictionary. ed := core.MakeDict() ed.Set("Filter", core.MakeName("Standard")) ed.Set("P", core.MakeInteger(int64(crypter.P))) ed.Set("V", core.MakeInteger(int64(crypter.V))) ed.Set("R", core.MakeInteger(int64(crypter.R))) ed.Set("Length", core.MakeInteger(int64(crypter.Length))) pw.encryptDict = ed // Prepare the ID object for the trailer. hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850))) id0 := core.PdfObjectString(hashcode[:]) b := make([]byte, 100) rand.Read(b) hashcode = md5.Sum(b) id1 := core.PdfObjectString(hashcode[:]) common.Log.Trace("Random b: % x", b) pw.ids = &core.PdfObjectArray{&id0, &id1} common.Log.Trace("Gen Id 0: % x", id0) // Generate encryption parameters if crypter.R < 5 { crypter.Id0 = string(id0) // Make the O and U objects. O, err := crypter.Alg3(userPass, ownerPass) if err != nil { common.Log.Debug("error: Error generating O for encryption (%s)", err) return err } crypter.O = []byte(O) common.Log.Trace("gen O: % x", O) U, key, err := crypter.Alg5(userPass) if err != nil { common.Log.Debug("error: Error generating O for encryption (%s)", err) return err } common.Log.Trace("gen U: % x", U) crypter.U = []byte(U) crypter.EncryptionKey = key ed.Set("O", &O) ed.Set("U", &U) } else { // R >= 5 err := crypter.GenerateParams(userPass, ownerPass) if err != nil { return err } ed.Set("O", core.MakeString(string(crypter.O))) ed.Set("U", core.MakeString(string(crypter.U))) ed.Set("OE", core.MakeString(string(crypter.OE))) ed.Set("UE", core.MakeString(string(crypter.UE))) ed.Set("EncryptMetadata", core.MakeBool(crypter.EncryptMetadata)) if crypter.R > 5 { ed.Set("Perms", core.MakeString(string(crypter.Perms))) } } if crypter.V >= 4 { if err := crypter.SaveCryptFilters(ed); err != nil { return err } } // Make an object to contain the encryption dictionary. io := core.MakeIndirectObject(ed) pw.encryptObj = io pw.addObject(io) return nil } // Write the pdf out. func (pw *PdfWriter) Write(ws io.WriteSeeker) error { // Outlines. if pw.outlineTree != nil { common.Log.Trace("OutlineTree: %+v", pw.outlineTree) outlines := pw.outlineTree.ToPdfObject() common.Log.Trace("Outlines: %+v (%T, p:%p)", outlines, outlines, outlines) pw.catalog.Set("Outlines", outlines) err := pw.addObjects(outlines) if err != nil { return err } } // Form fields. if pw.acroForm != nil { common.Log.Trace("Writing acro forms") indObj := pw.acroForm.ToPdfObject() common.Log.Trace("AcroForm: %+v", indObj) pw.catalog.Set("AcroForm", indObj) err := pw.addObjects(indObj) if err != nil { return err } } // Check pending objects prior to write. for pendingObj, pendingObjDict := range pw.pendingObjects { if !pw.hasObject(pendingObj) { common.Log.Debug("error Pending object %+v %T (%p) never added for writing", pendingObj, pendingObj, pendingObj) for _, key := range pendingObjDict.Keys() { val := pendingObjDict.Get(key) if val == pendingObj { common.Log.Debug("Pending object found! and replaced with null") pendingObjDict.Set(key, core.MakeNull()) break } } } } // Set version in the catalog. pw.catalog.Set("Version", core.MakeName(fmt.Sprintf("%d.%d", pw.majorVersion, pw.minorVersion))) w := bufio.NewWriter(ws) pw.writer = w w.WriteString(fmt.Sprintf("%%PDF-%d.%d\n", pw.majorVersion, pw.minorVersion)) w.WriteString("%âãÏÓ\n") w.Flush() pw.updateObjectNumbers() offsets := []int64{} // Write objects common.Log.Trace("Writing %d obj", len(pw.objects)) for idx, obj := range pw.objects { common.Log.Trace("Writing %d", idx) pw.writer.Flush() offset, _ := ws.Seek(0, os.SEEK_CUR) offsets = append(offsets, offset) // Encrypt prior to writing. // Encrypt dictionary should not be encrypted. if pw.crypter != nil && obj != pw.encryptObj { err := pw.crypter.Encrypt(obj, int64(idx+1), 0) if err != nil { common.Log.Debug("error: Failed encrypting (%s)", err) return err } } pw.writeObject(idx+1, obj) } w.Flush() xrefOffset, _ := ws.Seek(0, os.SEEK_CUR) // Write xref table. pw.writer.WriteString("xref\r\n") outStr := fmt.Sprintf("%d %d\r\n", 0, len(pw.objects)+1) pw.writer.WriteString(outStr) outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535) pw.writer.WriteString(outStr) for _, offset := range offsets { outStr = fmt.Sprintf("%.10d %.5d n\r\n", offset, 0) pw.writer.WriteString(outStr) } // Generate & write trailer trailer := core.MakeDict() trailer.Set("Info", pw.infoObj) trailer.Set("Root", pw.root) trailer.Set("Size", core.MakeInteger(int64(len(pw.objects)+1))) // If encrypted! if pw.crypter != nil { trailer.Set("Encrypt", pw.encryptObj) trailer.Set("ID", pw.ids) common.Log.Trace("Ids: %s", pw.ids) } pw.writer.WriteString("trailer\n") pw.writer.WriteString(trailer.DefaultWriteString()) pw.writer.WriteString("\n") // Make offset reference. outStr = fmt.Sprintf("startxref\n%d\n", xrefOffset) pw.writer.WriteString(outStr) pw.writer.WriteString("%%EOF\n") w.Flush() return nil }