package model import ( "errors" "fmt" "strings" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core" ) // PDF page object (7.7.3.3 - Table 30). type PdfPage struct { Parent core.PdfObject LastModified *PdfDate Resources *PdfPageResources CropBox *PdfRectangle MediaBox *PdfRectangle BleedBox *PdfRectangle TrimBox *PdfRectangle ArtBox *PdfRectangle BoxColorInfo core.PdfObject Contents core.PdfObject Rotate *int64 Group core.PdfObject Thumb core.PdfObject B core.PdfObject Dur core.PdfObject Trans core.PdfObject AA core.PdfObject Metadata core.PdfObject PieceInfo core.PdfObject StructParents core.PdfObject ID core.PdfObject PZ core.PdfObject SeparationInfo core.PdfObject Tabs core.PdfObject TemplateInstantiated core.PdfObject PresSteps core.PdfObject UserUnit core.PdfObject VP core.PdfObject Annotations []*PdfAnnotation // Primitive container. pageDict *core.PdfObjectDictionary primitive *core.PdfIndirectObject } func NewPdfPage() *PdfPage { page := PdfPage{} page.pageDict = core.MakeDict() container := core.PdfIndirectObject{} container.PdfObject = page.pageDict page.primitive = &container return &page } func (pp *PdfPage) setContainer(container *core.PdfIndirectObject) { container.PdfObject = pp.pageDict pp.primitive = container } func (pp *PdfPage) Duplicate() *PdfPage { dup := *pp dup.pageDict = core.MakeDict() dup.primitive = core.MakeIndirectObject(dup.pageDict) return &dup } // Build a PdfPage based on the underlying dictionary. // Used in loading existing PDF files. // Note that a new container is created (indirect object). func (reader *PdfReader) newPdfPageFromDict(p *core.PdfObjectDictionary) (*PdfPage, error) { page := NewPdfPage() page.pageDict = p //XXX? d := *p pType, ok := d.Get("Type").(*core.PdfObjectName) if !ok { return nil, errors.New("missing/Invalid Page dictionary Type") } if *pType != "Page" { return nil, errors.New("page dictionary Type != Page") } if obj := d.Get("Parent"); obj != nil { page.Parent = obj } if obj := d.Get("LastModified"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } strObj, ok := core.TraceToDirectObject(obj).(*core.PdfObjectString) if !ok { return nil, errors.New("page dictionary LastModified != string") } lastmod, err := NewPdfDate(string(*strObj)) if err != nil { return nil, err } page.LastModified = &lastmod } if obj := d.Get("Resources"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } dict, ok := core.TraceToDirectObject(obj).(*core.PdfObjectDictionary) if !ok { return nil, fmt.Errorf("invalid resource dictionary (%T)", obj) } page.Resources, err = NewPdfPageResourcesFromDict(dict) if err != nil { return nil, err } } else { // If Resources not explicitly defined, look up the tree (Parent objects) using // the getResources() function. Resources should always be accessible. resources, err := page.getResources() if err != nil { return nil, err } if resources == nil { resources = NewPdfPageResources() } page.Resources = resources } if obj := d.Get("MediaBox"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } boxArr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray) if !ok { return nil, errors.New("page MediaBox not an array") } page.MediaBox, err = NewPdfRectangle(*boxArr) if err != nil { return nil, err } } if obj := d.Get("CropBox"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } boxArr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray) if !ok { return nil, errors.New("page CropBox not an array") } page.CropBox, err = NewPdfRectangle(*boxArr) if err != nil { return nil, err } } if obj := d.Get("BleedBox"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } boxArr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray) if !ok { return nil, errors.New("page BleedBox not an array") } page.BleedBox, err = NewPdfRectangle(*boxArr) if err != nil { return nil, err } } if obj := d.Get("TrimBox"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } boxArr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray) if !ok { return nil, errors.New("page TrimBox not an array") } page.TrimBox, err = NewPdfRectangle(*boxArr) if err != nil { return nil, err } } if obj := d.Get("ArtBox"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } boxArr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray) if !ok { return nil, errors.New("page ArtBox not an array") } page.ArtBox, err = NewPdfRectangle(*boxArr) if err != nil { return nil, err } } if obj := d.Get("BoxColorInfo"); obj != nil { page.BoxColorInfo = obj } if obj := d.Get("Contents"); obj != nil { page.Contents = obj } if obj := d.Get("Rotate"); obj != nil { var err error obj, err = reader.traceToObject(obj) if err != nil { return nil, err } iObj, ok := core.TraceToDirectObject(obj).(*core.PdfObjectInteger) if !ok { return nil, errors.New("invalid Page Rotate object") } iVal := int64(*iObj) page.Rotate = &iVal } if obj := d.Get("Group"); obj != nil { page.Group = obj } if obj := d.Get("Thumb"); obj != nil { page.Thumb = obj } if obj := d.Get("B"); obj != nil { page.B = obj } if obj := d.Get("Dur"); obj != nil { page.Dur = obj } if obj := d.Get("Trans"); obj != nil { page.Trans = obj } //if obj := d.Get("Annots"); obj != nil { // page.Annots = obj //} if obj := d.Get("AA"); obj != nil { page.AA = obj } if obj := d.Get("Metadata"); obj != nil { page.Metadata = obj } if obj := d.Get("PieceInfo"); obj != nil { page.PieceInfo = obj } if obj := d.Get("StructParents"); obj != nil { page.StructParents = obj } if obj := d.Get("ID"); obj != nil { page.ID = obj } if obj := d.Get("PZ"); obj != nil { page.PZ = obj } if obj := d.Get("SeparationInfo"); obj != nil { page.SeparationInfo = obj } if obj := d.Get("Tabs"); obj != nil { page.Tabs = obj } if obj := d.Get("TemplateInstantiated"); obj != nil { page.TemplateInstantiated = obj } if obj := d.Get("PresSteps"); obj != nil { page.PresSteps = obj } if obj := d.Get("UserUnit"); obj != nil { page.UserUnit = obj } if obj := d.Get("VP"); obj != nil { page.VP = obj } var err error page.Annotations, err = reader.LoadAnnotations(&d) if err != nil { return nil, err } return page, nil } func (reader *PdfReader) LoadAnnotations(d *core.PdfObjectDictionary) ([]*PdfAnnotation, error) { annotsObj := d.Get("Annots") if annotsObj == nil { return nil, nil } var err error annotsObj, err = reader.traceToObject(annotsObj) if err != nil { return nil, err } annotsArr, ok := core.TraceToDirectObject(annotsObj).(*core.PdfObjectArray) if !ok { return nil, fmt.Errorf("annots not an array") } annotations := []*PdfAnnotation{} for _, obj := range *annotsArr { obj, err = reader.traceToObject(obj) if err != nil { return nil, err } // Technically all annotation dictionaries should be inside indirect objects. // In reality, sometimes the annotation dictionary is inline within the Annots array. if _, isNull := obj.(*core.PdfObjectNull); isNull { // Can safely ignore. continue } annotDict, isDict := obj.(*core.PdfObjectDictionary) indirectObj, isIndirect := obj.(*core.PdfIndirectObject) if isDict { // Create a container; indirect object; around the dictionary. indirectObj = &core.PdfIndirectObject{} indirectObj.PdfObject = annotDict } else { if !isIndirect { return nil, fmt.Errorf("annotation not in an indirect object") } } annot, err := reader.newPdfAnnotationFromIndirectObject(indirectObj) if err != nil { return nil, err } annotations = append(annotations, annot) } return annotations, nil } // Get the inheritable media box value, either from the page // or a higher up page/pages struct. func (pp *PdfPage) GetMediaBox() (*PdfRectangle, error) { if pp.MediaBox != nil { return pp.MediaBox, nil } node := pp.Parent for node != nil { dictObj, ok := node.(*core.PdfIndirectObject) if !ok { return nil, errors.New("invalid parent object") } dict, ok := dictObj.PdfObject.(*core.PdfObjectDictionary) if !ok { return nil, errors.New("invalid parent objects dictionary") } if obj := dict.Get("MediaBox"); obj != nil { arr, ok := obj.(*core.PdfObjectArray) if !ok { return nil, errors.New("invalid media box") } rect, err := NewPdfRectangle(*arr) if err != nil { return nil, err } return rect, nil } node = dict.Get("Parent") } return nil, errors.New("media box not defined") } // Get the inheritable resources, either from the page or or a higher up page/pages struct. func (pp *PdfPage) getResources() (*PdfPageResources, error) { if pp.Resources != nil { return pp.Resources, nil } node := pp.Parent for node != nil { dictObj, ok := node.(*core.PdfIndirectObject) if !ok { return nil, errors.New("invalid parent object") } dict, ok := dictObj.PdfObject.(*core.PdfObjectDictionary) if !ok { return nil, errors.New("invalid parent objects dictionary") } if obj := dict.Get("Resources"); obj != nil { prDict, ok := core.TraceToDirectObject(obj).(*core.PdfObjectDictionary) if !ok { return nil, errors.New("invalid resource dict") } resources, err := NewPdfPageResourcesFromDict(prDict) if err != nil { return nil, err } return resources, nil } // Keep moving up the tree... node = dict.Get("Parent") } // No resources defined... return nil, nil } // Convert the Page to a PDF object dictionary. func (pp *PdfPage) GetPageDict() *core.PdfObjectDictionary { p := pp.pageDict p.Set("Type", core.MakeName("Page")) p.Set("Parent", pp.Parent) if pp.LastModified != nil { p.Set("LastModified", pp.LastModified.ToPdfObject()) } if pp.Resources != nil { p.Set("Resources", pp.Resources.ToPdfObject()) } if pp.CropBox != nil { p.Set("CropBox", pp.CropBox.ToPdfObject()) } if pp.MediaBox != nil { p.Set("MediaBox", pp.MediaBox.ToPdfObject()) } if pp.BleedBox != nil { p.Set("BleedBox", pp.BleedBox.ToPdfObject()) } if pp.TrimBox != nil { p.Set("TrimBox", pp.TrimBox.ToPdfObject()) } if pp.ArtBox != nil { p.Set("ArtBox", pp.ArtBox.ToPdfObject()) } p.SetIfNotNil("BoxColorInfo", pp.BoxColorInfo) p.SetIfNotNil("Contents", pp.Contents) if pp.Rotate != nil { p.Set("Rotate", core.MakeInteger(*pp.Rotate)) } p.SetIfNotNil("Group", pp.Group) p.SetIfNotNil("Thumb", pp.Thumb) p.SetIfNotNil("B", pp.B) p.SetIfNotNil("Dur", pp.Dur) p.SetIfNotNil("Trans", pp.Trans) p.SetIfNotNil("AA", pp.AA) p.SetIfNotNil("Metadata", pp.Metadata) p.SetIfNotNil("PieceInfo", pp.PieceInfo) p.SetIfNotNil("StructParents", pp.StructParents) p.SetIfNotNil("ID", pp.ID) p.SetIfNotNil("PZ", pp.PZ) p.SetIfNotNil("SeparationInfo", pp.SeparationInfo) p.SetIfNotNil("Tabs", pp.Tabs) p.SetIfNotNil("TemplateInstantiated", pp.TemplateInstantiated) p.SetIfNotNil("PresSteps", pp.PresSteps) p.SetIfNotNil("UserUnit", pp.UserUnit) p.SetIfNotNil("VP", pp.VP) if pp.Annotations != nil { arr := core.PdfObjectArray{} for _, annot := range pp.Annotations { if subannot := annot.GetContext(); subannot != nil { arr = append(arr, subannot.ToPdfObject()) } else { // Generic annotation dict (without subtype). arr = append(arr, annot.ToPdfObject()) } } p.Set("Annots", &arr) } return p } // Get the page object as an indirect objects. Wraps the Page // dictionary into an indirect object. func (pp *PdfPage) GetPageAsIndirectObject() *core.PdfIndirectObject { return pp.primitive } func (pp *PdfPage) GetContainingPdfObject() core.PdfObject { return pp.primitive } func (pp *PdfPage) ToPdfObject() core.PdfObject { container := pp.primitive pp.GetPageDict() // update. return container } // Add an image to the XObject resources. func (pp *PdfPage) AddImageResource(name core.PdfObjectName, ximg *XObjectImage) error { var xresDict *core.PdfObjectDictionary if pp.Resources.XObject == nil { xresDict = core.MakeDict() pp.Resources.XObject = xresDict } else { var ok bool xresDict, ok = (pp.Resources.XObject).(*core.PdfObjectDictionary) if !ok { return errors.New("invalid xres dict type") } } // Make a stream object container. xresDict.Set(name, ximg.ToPdfObject()) return nil } // Check if has XObject resource by name. func (pp *PdfPage) HasXObjectByName(name core.PdfObjectName) bool { xresDict, has := pp.Resources.XObject.(*core.PdfObjectDictionary) if !has { return false } if obj := xresDict.Get(name); obj != nil { return true } else { return false } } // Get XObject by name. func (pp *PdfPage) GetXObjectByName(name core.PdfObjectName) (core.PdfObject, bool) { xresDict, has := pp.Resources.XObject.(*core.PdfObjectDictionary) if !has { return nil, false } if obj := xresDict.Get(name); obj != nil { return obj, true } else { return nil, false } } // Check if has font resource by name. func (pp *PdfPage) HasFontByName(name core.PdfObjectName) bool { fontDict, has := pp.Resources.Font.(*core.PdfObjectDictionary) if !has { return false } if obj := fontDict.Get(name); obj != nil { return true } else { return false } } // Check if ExtGState name is available. func (pp *PdfPage) HasExtGState(name core.PdfObjectName) bool { if pp.Resources == nil { return false } if pp.Resources.ExtGState == nil { return false } egsDict, ok := core.TraceToDirectObject(pp.Resources.ExtGState).(*core.PdfObjectDictionary) if !ok { common.Log.Debug("Expected ExtGState dictionary is not a dictionary: %v", core.TraceToDirectObject(pp.Resources.ExtGState)) return false } // Update the dictionary. obj := egsDict.Get(name) has := obj != nil return has } // Add a graphics state to the XObject resources. func (pp *PdfPage) AddExtGState(name core.PdfObjectName, egs *core.PdfObjectDictionary) error { if pp.Resources == nil { //this.Resources = &PdfPageResources{} pp.Resources = NewPdfPageResources() } if pp.Resources.ExtGState == nil { pp.Resources.ExtGState = core.MakeDict() } egsDict, ok := core.TraceToDirectObject(pp.Resources.ExtGState).(*core.PdfObjectDictionary) if !ok { common.Log.Debug("Expected ExtGState dictionary is not a dictionary: %v", core.TraceToDirectObject(pp.Resources.ExtGState)) return errors.New("type check error") } egsDict.Set(name, egs) return nil } // Add a font dictionary to the Font resources. func (pp *PdfPage) AddFont(name core.PdfObjectName, font core.PdfObject) error { if pp.Resources == nil { pp.Resources = NewPdfPageResources() } if pp.Resources.Font == nil { pp.Resources.Font = core.MakeDict() } fontDict, ok := core.TraceToDirectObject(pp.Resources.Font).(*core.PdfObjectDictionary) if !ok { common.Log.Debug("Expected font dictionary is not a dictionary: %v", core.TraceToDirectObject(pp.Resources.Font)) return errors.New("type check error") } // Update the dictionary. fontDict.Set(name, font) return nil } type WatermarkImageOptions struct { Alpha float64 FitToWidth bool PreserveAspectRatio bool } // Add a watermark to the page. func (pp *PdfPage) AddWatermarkImage(ximg *XObjectImage, opt WatermarkImageOptions) error { // Page dimensions. bbox, err := pp.GetMediaBox() if err != nil { return err } pWidth := bbox.Urx - bbox.Llx pHeight := bbox.Ury - bbox.Lly wWidth := float64(*ximg.Width) xOffset := (float64(pWidth) - float64(wWidth)) / 2 if opt.FitToWidth { wWidth = pWidth xOffset = 0 } wHeight := pHeight yOffset := float64(0) if opt.PreserveAspectRatio { wHeight = wWidth * float64(*ximg.Height) / float64(*ximg.Width) yOffset = (pHeight - wHeight) / 2 } if pp.Resources == nil { pp.Resources = NewPdfPageResources() } // Find available image name for this page. i := 0 imgName := core.PdfObjectName(fmt.Sprintf("Imw%d", i)) for pp.Resources.HasXObjectByName(imgName) { i++ imgName = core.PdfObjectName(fmt.Sprintf("Imw%d", i)) } err = pp.AddImageResource(imgName, ximg) if err != nil { return err } i = 0 gsName := core.PdfObjectName(fmt.Sprintf("GS%d", i)) for pp.HasExtGState(gsName) { i++ gsName = core.PdfObjectName(fmt.Sprintf("GS%d", i)) } gs0 := core.MakeDict() gs0.Set("BM", core.MakeName("Normal")) gs0.Set("CA", core.MakeFloat(opt.Alpha)) gs0.Set("ca", core.MakeFloat(opt.Alpha)) err = pp.AddExtGState(gsName, gs0) if err != nil { return err } contentStr := fmt.Sprintf("q\n"+ "/%s gs\n"+ "%.0f 0 0 %.0f %.4f %.4f cm\n"+ "/%s Do\n"+ "Q", gsName, wWidth, wHeight, xOffset, yOffset, imgName) pp.AddContentStreamByString(contentStr) return nil } // Add content stream by string. Puts the content string into a stream // object and points the content stream towards it. func (pp *PdfPage) AddContentStreamByString(contentStr string) { stream := core.PdfObjectStream{} sDict := core.MakeDict() stream.PdfObjectDictionary = sDict sDict.Set("Length", core.MakeInteger(int64(len(contentStr)))) stream.Stream = []byte(contentStr) if pp.Contents == nil { // If not set, place it directly. pp.Contents = &stream } else if contArray, isArray := core.TraceToDirectObject(pp.Contents).(*core.PdfObjectArray); isArray { // If an array of content streams, append it. *contArray = append(*contArray, &stream) } else { // Only 1 element in place. Wrap inside a new array and add the new one. contArray := core.PdfObjectArray{} contArray = append(contArray, pp.Contents) contArray = append(contArray, &stream) pp.Contents = &contArray } } // Set the content streams based on a string array. Will make 1 object stream // for each string and reference from the page Contents. Each stream will be // encoded using the encoding specified by the StreamEncoder, if empty, will // use identity encoding (raw data). func (pp *PdfPage) SetContentStreams(cStreams []string, encoder core.StreamEncoder) error { if len(cStreams) == 0 { pp.Contents = nil return nil } // If encoding is not set, use default raw encoder. if encoder == nil { encoder = core.NewRawEncoder() } streamObjs := []*core.PdfObjectStream{} for _, cStream := range cStreams { stream := &core.PdfObjectStream{} // Make a new stream dict based on the encoding parameters. sDict := encoder.MakeStreamDict() encoded, err := encoder.EncodeBytes([]byte(cStream)) if err != nil { return err } sDict.Set("Length", core.MakeInteger(int64(len(encoded)))) stream.PdfObjectDictionary = sDict stream.Stream = []byte(encoded) streamObjs = append(streamObjs, stream) } // Set the page contents. // Point directly to the object stream if only one, or embed in an array. if len(streamObjs) == 1 { pp.Contents = streamObjs[0] } else { contArray := core.PdfObjectArray{} for _, streamObj := range streamObjs { contArray = append(contArray, streamObj) } pp.Contents = &contArray } return nil } func getContentStreamAsString(cstreamObj core.PdfObject) (string, error) { if cstream, ok := core.TraceToDirectObject(cstreamObj).(*core.PdfObjectString); ok { return string(*cstream), nil } if cstream, ok := core.TraceToDirectObject(cstreamObj).(*core.PdfObjectStream); ok { buf, err := core.DecodeStream(cstream) if err != nil { return "", err } return string(buf), nil } return "", fmt.Errorf("invalid content stream object holder (%T)", core.TraceToDirectObject(cstreamObj)) } // Get Content Stream as an array of strings. func (pp *PdfPage) GetContentStreams() ([]string, error) { if pp.Contents == nil { return nil, nil } contents := core.TraceToDirectObject(pp.Contents) if contArray, isArray := contents.(*core.PdfObjectArray); isArray { // If an array of content streams, append it. cstreams := []string{} for _, cstreamObj := range *contArray { cstreamStr, err := getContentStreamAsString(cstreamObj) if err != nil { return nil, err } cstreams = append(cstreams, cstreamStr) } return cstreams, nil } else { // Only 1 element in place. Wrap inside a new array and add the new one. cstreamStr, err := getContentStreamAsString(contents) if err != nil { return nil, err } cstreams := []string{cstreamStr} return cstreams, nil } } // Get all the content streams for a page as one string. func (pp *PdfPage) GetAllContentStreams() (string, error) { cstreams, err := pp.GetContentStreams() if err != nil { return "", err } return strings.Join(cstreams, " "), nil } // Needs to have matching name and colorspace map entry. The Names define the order. type PdfPageResourcesColorspaces struct { Names []string Colorspaces map[string]PdfColorspace container *core.PdfIndirectObject } func NewPdfPageResourcesColorspaces() *PdfPageResourcesColorspaces { colorspaces := &PdfPageResourcesColorspaces{} colorspaces.Names = []string{} colorspaces.Colorspaces = map[string]PdfColorspace{} colorspaces.container = &core.PdfIndirectObject{} return colorspaces } // Set the colorspace corresponding to key. Add to Names if not set. func (pp *PdfPageResourcesColorspaces) Set(key core.PdfObjectName, val PdfColorspace) { if _, has := pp.Colorspaces[string(key)]; !has { pp.Names = append(pp.Names, string(key)) } pp.Colorspaces[string(key)] = val } func newPdfPageResourcesColorspacesFromPdfObject(obj core.PdfObject) (*PdfPageResourcesColorspaces, error) { colorspaces := &PdfPageResourcesColorspaces{} if indObj, isIndirect := obj.(*core.PdfIndirectObject); isIndirect { colorspaces.container = indObj obj = indObj.PdfObject } dict, ok := obj.(*core.PdfObjectDictionary) if !ok { return nil, errors.New("CS attribute type error") } colorspaces.Names = []string{} colorspaces.Colorspaces = map[string]PdfColorspace{} for _, csName := range dict.Keys() { csObj := dict.Get(csName) colorspaces.Names = append(colorspaces.Names, string(csName)) cs, err := NewPdfColorspaceFromPdfObject(csObj) if err != nil { return nil, err } colorspaces.Colorspaces[string(csName)] = cs } return colorspaces, nil } func (pp *PdfPageResourcesColorspaces) ToPdfObject() core.PdfObject { dict := core.MakeDict() for _, csName := range pp.Names { dict.Set(core.PdfObjectName(csName), pp.Colorspaces[csName].ToPdfObject()) } if pp.container != nil { pp.container.PdfObject = dict return pp.container } return dict }