package contentstream import ( "bytes" "errors" "fmt" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core" "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model" ) // A representation of an inline image in a Content stream. Everything between the BI and EI operands. // ContentStreamInlineImage implements the core.PdfObject interface although strictly it is not a PDF object. type ContentStreamInlineImage struct { BitsPerComponent core.PdfObject ColorSpace core.PdfObject Decode core.PdfObject DecodeParms core.PdfObject Filter core.PdfObject Height core.PdfObject ImageMask core.PdfObject Intent core.PdfObject Interpolate core.PdfObject Width core.PdfObject stream []byte } // Make a new content stream inline image object from an image. func NewInlineImageFromImage(img model.Image, encoder core.StreamEncoder) (*ContentStreamInlineImage, error) { if encoder == nil { encoder = core.NewRawEncoder() } inlineImage := ContentStreamInlineImage{} switch img.ColorComponents { case 1: inlineImage.ColorSpace = core.MakeName("G") // G short for DeviceGray case 3: inlineImage.ColorSpace = core.MakeName("RGB") // RGB short for DeviceRGB case 4: inlineImage.ColorSpace = core.MakeName("CMYK") // CMYK short for DeviceCMYK default: common.Log.Debug("invalid number of color components for inline image: %d", img.ColorComponents) return nil, errors.New("invalid number of color components") } inlineImage.BitsPerComponent = core.MakeInteger(img.BitsPerComponent) inlineImage.Width = core.MakeInteger(img.Width) inlineImage.Height = core.MakeInteger(img.Height) encoded, err := encoder.EncodeBytes(img.Data) if err != nil { return nil, err } inlineImage.stream = encoded filterName := encoder.GetFilterName() if filterName != core.StreamEncodingFilterNameRaw { inlineImage.Filter = core.MakeName(filterName) } // XXX/FIXME: Add decode params? return &inlineImage, nil } func (si *ContentStreamInlineImage) String() string { s := fmt.Sprintf("InlineImage(len=%d)\n", len(si.stream)) if si.BitsPerComponent != nil { s += "- BPC " + si.BitsPerComponent.DefaultWriteString() + "\n" } if si.ColorSpace != nil { s += "- CS " + si.ColorSpace.DefaultWriteString() + "\n" } if si.Decode != nil { s += "- D " + si.Decode.DefaultWriteString() + "\n" } if si.DecodeParms != nil { s += "- DP " + si.DecodeParms.DefaultWriteString() + "\n" } if si.Filter != nil { s += "- F " + si.Filter.DefaultWriteString() + "\n" } if si.Height != nil { s += "- H " + si.Height.DefaultWriteString() + "\n" } if si.ImageMask != nil { s += "- IM " + si.ImageMask.DefaultWriteString() + "\n" } if si.Intent != nil { s += "- Intent " + si.Intent.DefaultWriteString() + "\n" } if si.Interpolate != nil { s += "- I " + si.Interpolate.DefaultWriteString() + "\n" } if si.Width != nil { s += "- W " + si.Width.DefaultWriteString() + "\n" } return s } func (si *ContentStreamInlineImage) DefaultWriteString() string { var output bytes.Buffer // We do not start with "BI" as that is the operand and is written out separately. // Write out the parameters s := "" if si.BitsPerComponent != nil { s += "/BPC " + si.BitsPerComponent.DefaultWriteString() + "\n" } if si.ColorSpace != nil { s += "/CS " + si.ColorSpace.DefaultWriteString() + "\n" } if si.Decode != nil { s += "/D " + si.Decode.DefaultWriteString() + "\n" } if si.DecodeParms != nil { s += "/DP " + si.DecodeParms.DefaultWriteString() + "\n" } if si.Filter != nil { s += "/F " + si.Filter.DefaultWriteString() + "\n" } if si.Height != nil { s += "/H " + si.Height.DefaultWriteString() + "\n" } if si.ImageMask != nil { s += "/IM " + si.ImageMask.DefaultWriteString() + "\n" } if si.Intent != nil { s += "/Intent " + si.Intent.DefaultWriteString() + "\n" } if si.Interpolate != nil { s += "/I " + si.Interpolate.DefaultWriteString() + "\n" } if si.Width != nil { s += "/W " + si.Width.DefaultWriteString() + "\n" } output.WriteString(s) output.WriteString("ID ") output.Write(si.stream) output.WriteString("\nEI\n") return output.String() } func (s *ContentStreamInlineImage) GetColorSpace(resources *model.PdfPageResources) (model.PdfColorspace, error) { if s.ColorSpace == nil { // Default. common.Log.Debug("Inline image not having specified colorspace, assuming Gray") return model.NewPdfColorspaceDeviceGray(), nil } // If is an array, then could be an indexed colorspace. if arr, isArr := s.ColorSpace.(*core.PdfObjectArray); isArr { return newIndexedColorspaceFromPdfObject(arr) } name, ok := s.ColorSpace.(*core.PdfObjectName) if !ok { common.Log.Debug("error: Invalid object type (%T;%+v)", s.ColorSpace, s.ColorSpace) return nil, errors.New("type check error") } switch *name { case "G", "DeviceGray": return model.NewPdfColorspaceDeviceGray(), nil case "RGB", "DeviceRGB": return model.NewPdfColorspaceDeviceRGB(), nil case "CMYK", "DeviceCMYK": return model.NewPdfColorspaceDeviceCMYK(), nil case "I", "Indexed": return nil, errors.New("unsupported Index colorspace") default: if resources.ColorSpace == nil { // Can also refer to a name in the PDF page resources... common.Log.Debug("error, unsupported inline image colorspace: %s", *name) return nil, errors.New("unknown colorspace") } cs, has := resources.ColorSpace.Colorspaces[string(*name)] if !has { // Can also refer to a name in the PDF page resources... common.Log.Debug("error, unsupported inline image colorspace: %s", *name) return nil, errors.New("unknown colorspace") } return cs, nil } } func (s *ContentStreamInlineImage) GetEncoder() (core.StreamEncoder, error) { return newEncoderFromInlineImage(s) } // Is a mask ? // The image mask entry in the image dictionary specifies that the image data shall be used as a stencil // mask for painting in the current color. The mask data is 1bpc, grayscale. func (s *ContentStreamInlineImage) IsMask() (bool, error) { if s.ImageMask != nil { imMask, ok := s.ImageMask.(*core.PdfObjectBool) if !ok { common.Log.Debug("Image mask not a boolean") return false, errors.New("invalid object type") } return bool(*imMask), nil } else { return false, nil } } // Export the inline image to Image which can be transformed or exported easily. // Page resources are needed to look up colorspace information. func (si *ContentStreamInlineImage) ToImage(resources *model.PdfPageResources) (*model.Image, error) { // Decode the imaging data if encoded. encoder, err := newEncoderFromInlineImage(si) if err != nil { return nil, err } common.Log.Trace("encoder: %+v %T", encoder, encoder) common.Log.Trace("inline image: %+v", si) decoded, err := encoder.DecodeBytes(si.stream) if err != nil { return nil, err } image := &model.Image{} // Height. if si.Height == nil { return nil, errors.New("height attribute missing") } height, ok := si.Height.(*core.PdfObjectInteger) if !ok { return nil, errors.New("invalid height") } image.Height = int64(*height) // Width. if si.Width == nil { return nil, errors.New("width attribute missing") } width, ok := si.Width.(*core.PdfObjectInteger) if !ok { return nil, errors.New("invalid width") } image.Width = int64(*width) // Image mask? isMask, err := si.IsMask() if err != nil { return nil, err } if isMask { // Masks are grayscale 1bpc. image.BitsPerComponent = 1 image.ColorComponents = 1 } else { // BPC. if si.BitsPerComponent == nil { common.Log.Debug("Inline Bits per component missing - assuming 8") image.BitsPerComponent = 8 } else { bpc, ok := si.BitsPerComponent.(*core.PdfObjectInteger) if !ok { common.Log.Debug("error invalid bits per component value, type %T", si.BitsPerComponent) return nil, errors.New("BPC Type error") } image.BitsPerComponent = int64(*bpc) } // Color components. if si.ColorSpace != nil { cs, err := si.GetColorSpace(resources) if err != nil { return nil, err } image.ColorComponents = cs.GetNumComponents() } else { // Default gray if not specified. common.Log.Debug("Inline Image colorspace not specified - assuming 1 color component") image.ColorComponents = 1 } } image.Data = decoded return image, nil } // Parse an inline image from a content stream, both read its properties and binary data. // When called, "BI" has already been read from the stream. This function // finishes reading through "EI" and then returns the ContentStreamInlineImage. func (s *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, error) { // Reading parameters. im := ContentStreamInlineImage{} for { s.skipSpaces() obj, err, isOperand := s.parseObject() if err != nil { return nil, err } if !isOperand { // Not an operand.. Read key value properties.. param, ok := obj.(*core.PdfObjectName) if !ok { common.Log.Debug("invalid inline image property (expecting name) - %T", obj) return nil, fmt.Errorf("invalid inline image property (expecting name) - %T", obj) } valueObj, err, isOperand := s.parseObject() if err != nil { return nil, err } if isOperand { return nil, fmt.Errorf("not expecting an operand") } // From 8.9.7 "Inline Images" p. 223 (PDF32000_2008): // The key-value pairs appearing between the BI and ID operators are analogous to those in the dictionary // portion of an image XObject (though the syntax is different). // Table 93 shows the entries that are valid for an inline image, all of which shall have the same meanings // as in a stream dictionary (see Table 5) or an image dictionary (see Table 89). // Entries other than those listed shall be ignored; in particular, the Type, Subtype, and Length // entries normally found in a stream or image dictionary are unnecessary. // For convenience, the abbreviations shown in the table may be used in place of the fully spelled-out keys. // Table 94 shows additional abbreviations that can be used for the names of colour spaces and filters. switch *param { case "BPC", "BitsPerComponent": im.BitsPerComponent = valueObj case "CS", "ColorSpace": im.ColorSpace = valueObj case "D", "Decode": im.Decode = valueObj case "DP", "DecodeParms": im.DecodeParms = valueObj case "F", "Filter": im.Filter = valueObj case "H", "Height": im.Height = valueObj case "IM", "ImageMask": im.ImageMask = valueObj case "Intent": im.Intent = valueObj case "I", "Interpolate": im.Interpolate = valueObj case "W", "Width": im.Width = valueObj default: return nil, fmt.Errorf("unknown inline image parameter %s", *param) } } if isOperand { operand, ok := obj.(*core.PdfObjectString) if !ok { return nil, fmt.Errorf("failed to read inline image - invalid operand") } switch *operand { case "EI": // Image fully defined common.Log.Trace("Inline image finished...") return &im, nil case "ID": // Inline image data. // Should get a single space (0x20) followed by the data and then EI. common.Log.Trace("ID start") // Skip the space if its there. b, err := s.reader.Peek(1) if err != nil { return nil, err } if core.IsWhiteSpace(b[0]) { s.reader.Discard(1) } // Unfortunately there is no good way to know how many bytes to read since it // depends on the Filter and encoding etc. // Therefore we will simply read until we find "EI" where is whitespace // although of course that could be a part of the data (even if unlikely). im.stream = []byte{} state := 0 var skipBytes []byte for { c, err := s.reader.ReadByte() if err != nil { common.Log.Debug("Unable to find end of image EI in inline image data") return nil, err } switch state { case 0: if core.IsWhiteSpace(c) { skipBytes = []byte{} skipBytes = append(skipBytes, c) state = 1 } else { im.stream = append(im.stream, c) } case 1: skipBytes = append(skipBytes, c) if c == 'E' { state = 2 } else { im.stream = append(im.stream, skipBytes...) skipBytes = []byte{} // Clear. // Need an extra check to decide if we fall back to state 0 or 1. if core.IsWhiteSpace(c) { state = 1 } else { state = 0 } } case 2: skipBytes = append(skipBytes, c) if c == 'I' { state = 3 } else { im.stream = append(im.stream, skipBytes...) skipBytes = []byte{} // Clear. state = 0 } case 3: skipBytes = append(skipBytes, c) if core.IsWhiteSpace(c) { // image data finished. if len(im.stream) > 100 { common.Log.Trace("Image stream (%d): % x ...", len(im.stream), im.stream[:100]) } else { common.Log.Trace("Image stream (%d): % x", len(im.stream), im.stream) } // Exit point. return &im, nil } else { // Seems like "EI" was part of the data. im.stream = append(im.stream, skipBytes...) skipBytes = []byte{} // Clear. state = 0 } } } // Never reached (exit point is at end of EI). } } } }