fix wrong git ignore

2025-12-15 17:44:00 +01:00
parent ed9f31bb96
commit 8f313c00f0
126 changed files with 70589 additions and 1 deletions
--- a/internal/pdf/contentstream/inline-image.go
+++ b/internal/pdf/contentstream/inline-image.go
@@ -0,0 +1,456 @@
+package contentstream
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+
+	"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
+	"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
+	"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model"
+)
+
+// A representation of an inline image in a Content stream. Everything between the BI and EI operands.
+// ContentStreamInlineImage implements the core.PdfObject interface although strictly it is not a PDF object.
+type ContentStreamInlineImage struct {
+	BitsPerComponent core.PdfObject
+	ColorSpace       core.PdfObject
+	Decode           core.PdfObject
+	DecodeParms      core.PdfObject
+	Filter           core.PdfObject
+	Height           core.PdfObject
+	ImageMask        core.PdfObject
+	Intent           core.PdfObject
+	Interpolate      core.PdfObject
+	Width            core.PdfObject
+	stream           []byte
+}
+
+// Make a new content stream inline image object from an image.
+func NewInlineImageFromImage(img model.Image, encoder core.StreamEncoder) (*ContentStreamInlineImage, error) {
+	if encoder == nil {
+		encoder = core.NewRawEncoder()
+	}
+
+	inlineImage := ContentStreamInlineImage{}
+	switch img.ColorComponents {
+	case 1:
+		inlineImage.ColorSpace = core.MakeName("G") // G short for DeviceGray
+	case 3:
+		inlineImage.ColorSpace = core.MakeName("RGB") // RGB short for DeviceRGB
+	case 4:
+		inlineImage.ColorSpace = core.MakeName("CMYK") // CMYK short for DeviceCMYK
+	default:
+		common.Log.Debug("invalid number of color components for inline image: %d", img.ColorComponents)
+		return nil, errors.New("invalid number of color components")
+	}
+
+	inlineImage.BitsPerComponent = core.MakeInteger(img.BitsPerComponent)
+	inlineImage.Width = core.MakeInteger(img.Width)
+	inlineImage.Height = core.MakeInteger(img.Height)
+
+	encoded, err := encoder.EncodeBytes(img.Data)
+	if err != nil {
+		return nil, err
+	}
+
+	inlineImage.stream = encoded
+
+	filterName := encoder.GetFilterName()
+	if filterName != core.StreamEncodingFilterNameRaw {
+		inlineImage.Filter = core.MakeName(filterName)
+	}
+	// XXX/FIXME: Add decode params?
+
+	return &inlineImage, nil
+}
+
+func (si *ContentStreamInlineImage) String() string {
+	s := fmt.Sprintf("InlineImage(len=%d)\n", len(si.stream))
+	if si.BitsPerComponent != nil {
+		s += "- BPC " + si.BitsPerComponent.DefaultWriteString() + "\n"
+	}
+	if si.ColorSpace != nil {
+		s += "- CS " + si.ColorSpace.DefaultWriteString() + "\n"
+	}
+	if si.Decode != nil {
+		s += "- D " + si.Decode.DefaultWriteString() + "\n"
+	}
+	if si.DecodeParms != nil {
+		s += "- DP " + si.DecodeParms.DefaultWriteString() + "\n"
+	}
+	if si.Filter != nil {
+		s += "- F " + si.Filter.DefaultWriteString() + "\n"
+	}
+	if si.Height != nil {
+		s += "- H " + si.Height.DefaultWriteString() + "\n"
+	}
+	if si.ImageMask != nil {
+		s += "- IM " + si.ImageMask.DefaultWriteString() + "\n"
+	}
+	if si.Intent != nil {
+		s += "- Intent " + si.Intent.DefaultWriteString() + "\n"
+	}
+	if si.Interpolate != nil {
+		s += "- I " + si.Interpolate.DefaultWriteString() + "\n"
+	}
+	if si.Width != nil {
+		s += "- W " + si.Width.DefaultWriteString() + "\n"
+	}
+	return s
+}
+
+func (si *ContentStreamInlineImage) DefaultWriteString() string {
+	var output bytes.Buffer
+
+	// We do not start with "BI" as that is the operand and is written out separately.
+	// Write out the parameters
+	s := ""
+
+	if si.BitsPerComponent != nil {
+		s += "/BPC " + si.BitsPerComponent.DefaultWriteString() + "\n"
+	}
+	if si.ColorSpace != nil {
+		s += "/CS " + si.ColorSpace.DefaultWriteString() + "\n"
+	}
+	if si.Decode != nil {
+		s += "/D " + si.Decode.DefaultWriteString() + "\n"
+	}
+	if si.DecodeParms != nil {
+		s += "/DP " + si.DecodeParms.DefaultWriteString() + "\n"
+	}
+	if si.Filter != nil {
+		s += "/F " + si.Filter.DefaultWriteString() + "\n"
+	}
+	if si.Height != nil {
+		s += "/H " + si.Height.DefaultWriteString() + "\n"
+	}
+	if si.ImageMask != nil {
+		s += "/IM " + si.ImageMask.DefaultWriteString() + "\n"
+	}
+	if si.Intent != nil {
+		s += "/Intent " + si.Intent.DefaultWriteString() + "\n"
+	}
+	if si.Interpolate != nil {
+		s += "/I " + si.Interpolate.DefaultWriteString() + "\n"
+	}
+	if si.Width != nil {
+		s += "/W " + si.Width.DefaultWriteString() + "\n"
+	}
+	output.WriteString(s)
+
+	output.WriteString("ID ")
+	output.Write(si.stream)
+	output.WriteString("\nEI\n")
+
+	return output.String()
+}
+
+func (s *ContentStreamInlineImage) GetColorSpace(resources *model.PdfPageResources) (model.PdfColorspace, error) {
+	if s.ColorSpace == nil {
+		// Default.
+		common.Log.Debug("Inline image not having specified colorspace, assuming Gray")
+		return model.NewPdfColorspaceDeviceGray(), nil
+	}
+
+	// If is an array, then could be an indexed colorspace.
+	if arr, isArr := s.ColorSpace.(*core.PdfObjectArray); isArr {
+		return newIndexedColorspaceFromPdfObject(arr)
+	}
+
+	name, ok := s.ColorSpace.(*core.PdfObjectName)
+	if !ok {
+		common.Log.Debug("error: Invalid object type (%T;%+v)", s.ColorSpace, s.ColorSpace)
+		return nil, errors.New("type check error")
+	}
+
+	switch *name {
+	case "G", "DeviceGray":
+		return model.NewPdfColorspaceDeviceGray(), nil
+	case "RGB", "DeviceRGB":
+		return model.NewPdfColorspaceDeviceRGB(), nil
+	case "CMYK", "DeviceCMYK":
+		return model.NewPdfColorspaceDeviceCMYK(), nil
+	case "I", "Indexed":
+		return nil, errors.New("unsupported Index colorspace")
+	default:
+		if resources.ColorSpace == nil {
+			// Can also refer to a name in the PDF page resources...
+			common.Log.Debug("error, unsupported inline image colorspace: %s", *name)
+			return nil, errors.New("unknown colorspace")
+		}
+
+		cs, has := resources.ColorSpace.Colorspaces[string(*name)]
+		if !has {
+			// Can also refer to a name in the PDF page resources...
+			common.Log.Debug("error, unsupported inline image colorspace: %s", *name)
+			return nil, errors.New("unknown colorspace")
+		}
+
+		return cs, nil
+	}
+}
+
+func (s *ContentStreamInlineImage) GetEncoder() (core.StreamEncoder, error) {
+	return newEncoderFromInlineImage(s)
+}
+
+// Is a mask ?
+// The image mask entry in the image dictionary specifies that the image data shall be used as a stencil
+// mask for painting in the current color. The mask data is 1bpc, grayscale.
+func (s *ContentStreamInlineImage) IsMask() (bool, error) {
+	if s.ImageMask != nil {
+		imMask, ok := s.ImageMask.(*core.PdfObjectBool)
+		if !ok {
+			common.Log.Debug("Image mask not a boolean")
+			return false, errors.New("invalid object type")
+		}
+
+		return bool(*imMask), nil
+	} else {
+		return false, nil
+	}
+
+}
+
+// Export the inline image to Image which can be transformed or exported easily.
+// Page resources are needed to look up colorspace information.
+func (si *ContentStreamInlineImage) ToImage(resources *model.PdfPageResources) (*model.Image, error) {
+	// Decode the imaging data if encoded.
+	encoder, err := newEncoderFromInlineImage(si)
+	if err != nil {
+		return nil, err
+	}
+	common.Log.Trace("encoder: %+v %T", encoder, encoder)
+	common.Log.Trace("inline image: %+v", si)
+
+	decoded, err := encoder.DecodeBytes(si.stream)
+	if err != nil {
+		return nil, err
+	}
+
+	image := &model.Image{}
+
+	// Height.
+	if si.Height == nil {
+		return nil, errors.New("height attribute missing")
+	}
+	height, ok := si.Height.(*core.PdfObjectInteger)
+	if !ok {
+		return nil, errors.New("invalid height")
+	}
+	image.Height = int64(*height)
+
+	// Width.
+	if si.Width == nil {
+		return nil, errors.New("width attribute missing")
+	}
+	width, ok := si.Width.(*core.PdfObjectInteger)
+	if !ok {
+		return nil, errors.New("invalid width")
+	}
+	image.Width = int64(*width)
+
+	// Image mask?
+	isMask, err := si.IsMask()
+	if err != nil {
+		return nil, err
+	}
+
+	if isMask {
+		// Masks are grayscale 1bpc.
+		image.BitsPerComponent = 1
+		image.ColorComponents = 1
+	} else {
+		// BPC.
+		if si.BitsPerComponent == nil {
+			common.Log.Debug("Inline Bits per component missing - assuming 8")
+			image.BitsPerComponent = 8
+		} else {
+			bpc, ok := si.BitsPerComponent.(*core.PdfObjectInteger)
+			if !ok {
+				common.Log.Debug("error invalid bits per component value, type %T", si.BitsPerComponent)
+				return nil, errors.New("BPC Type error")
+			}
+			image.BitsPerComponent = int64(*bpc)
+		}
+
+		// Color components.
+		if si.ColorSpace != nil {
+			cs, err := si.GetColorSpace(resources)
+			if err != nil {
+				return nil, err
+			}
+			image.ColorComponents = cs.GetNumComponents()
+		} else {
+			// Default gray if not specified.
+			common.Log.Debug("Inline Image colorspace not specified - assuming 1 color component")
+			image.ColorComponents = 1
+		}
+	}
+
+	image.Data = decoded
+
+	return image, nil
+}
+
+// Parse an inline image from a content stream, both read its properties and binary data.
+// When called, "BI" has already been read from the stream.  This function
+// finishes reading through "EI" and then returns the ContentStreamInlineImage.
+func (s *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, error) {
+	// Reading parameters.
+	im := ContentStreamInlineImage{}
+
+	for {
+		s.skipSpaces()
+		obj, err, isOperand := s.parseObject()
+		if err != nil {
+			return nil, err
+		}
+
+		if !isOperand {
+			// Not an operand.. Read key value properties..
+			param, ok := obj.(*core.PdfObjectName)
+			if !ok {
+				common.Log.Debug("invalid inline image property (expecting name) - %T", obj)
+				return nil, fmt.Errorf("invalid inline image property (expecting name) - %T", obj)
+			}
+
+			valueObj, err, isOperand := s.parseObject()
+			if err != nil {
+				return nil, err
+			}
+			if isOperand {
+				return nil, fmt.Errorf("not expecting an operand")
+			}
+
+			// From 8.9.7 "Inline Images" p. 223 (PDF32000_2008):
+			// The key-value pairs appearing between the BI and ID operators are analogous to those in the dictionary
+			// portion of an image XObject (though the syntax is different).
+			// Table 93 shows the entries that are valid for an inline image, all of which shall have the same meanings
+			// as in a stream dictionary (see Table 5) or an image dictionary (see Table 89).
+			// Entries other than those listed shall be ignored; in particular, the Type, Subtype, and Length
+			// entries normally found in a stream or image dictionary are unnecessary.
+			// For convenience, the abbreviations shown in the table may be used in place of the fully spelled-out keys.
+			// Table 94 shows additional abbreviations that can be used for the names of colour spaces and filters.
+
+			switch *param {
+			case "BPC", "BitsPerComponent":
+				im.BitsPerComponent = valueObj
+			case "CS", "ColorSpace":
+				im.ColorSpace = valueObj
+			case "D", "Decode":
+				im.Decode = valueObj
+			case "DP", "DecodeParms":
+				im.DecodeParms = valueObj
+			case "F", "Filter":
+				im.Filter = valueObj
+			case "H", "Height":
+				im.Height = valueObj
+			case "IM", "ImageMask":
+				im.ImageMask = valueObj
+			case "Intent":
+				im.Intent = valueObj
+			case "I", "Interpolate":
+				im.Interpolate = valueObj
+			case "W", "Width":
+				im.Width = valueObj
+			default:
+				return nil, fmt.Errorf("unknown inline image parameter %s", *param)
+			}
+		}
+
+		if isOperand {
+			operand, ok := obj.(*core.PdfObjectString)
+			if !ok {
+				return nil, fmt.Errorf("failed to read inline image - invalid operand")
+			}
+
+			switch *operand {
+			case "EI":
+				// Image fully defined
+				common.Log.Trace("Inline image finished...")
+				return &im, nil
+			case "ID":
+				// Inline image data.
+				// Should get a single space (0x20) followed by the data and then EI.
+				common.Log.Trace("ID start")
+
+				// Skip the space if its there.
+				b, err := s.reader.Peek(1)
+				if err != nil {
+					return nil, err
+				}
+				if core.IsWhiteSpace(b[0]) {
+					s.reader.Discard(1)
+				}
+
+				// Unfortunately there is no good way to know how many bytes to read since it
+				// depends on the Filter and encoding etc.
+				// Therefore we will simply read until we find "<ws>EI<ws>" where <ws> is whitespace
+				// although of course that could be a part of the data (even if unlikely).
+				im.stream = []byte{}
+				state := 0
+				var skipBytes []byte
+				for {
+					c, err := s.reader.ReadByte()
+					if err != nil {
+						common.Log.Debug("Unable to find end of image EI in inline image data")
+						return nil, err
+					}
+					switch state {
+					case 0:
+						if core.IsWhiteSpace(c) {
+							skipBytes = []byte{}
+							skipBytes = append(skipBytes, c)
+							state = 1
+						} else {
+							im.stream = append(im.stream, c)
+						}
+					case 1:
+						skipBytes = append(skipBytes, c)
+						if c == 'E' {
+							state = 2
+						} else {
+							im.stream = append(im.stream, skipBytes...)
+							skipBytes = []byte{} // Clear.
+							// Need an extra check to decide if we fall back to state 0 or 1.
+							if core.IsWhiteSpace(c) {
+								state = 1
+							} else {
+								state = 0
+							}
+						}
+					case 2:
+						skipBytes = append(skipBytes, c)
+						if c == 'I' {
+							state = 3
+						} else {
+							im.stream = append(im.stream, skipBytes...)
+							skipBytes = []byte{} // Clear.
+							state = 0
+						}
+					case 3:
+						skipBytes = append(skipBytes, c)
+						if core.IsWhiteSpace(c) {
+							// image data finished.
+							if len(im.stream) > 100 {
+								common.Log.Trace("Image stream (%d): % x ...", len(im.stream), im.stream[:100])
+							} else {
+								common.Log.Trace("Image stream (%d): % x", len(im.stream), im.stream)
+							}
+							// Exit point.
+							return &im, nil
+						} else {
+							// Seems like "<ws>EI" was part of the data.
+							im.stream = append(im.stream, skipBytes...)
+							skipBytes = []byte{} // Clear.
+							state = 0
+						}
+					}
+				}
+				// Never reached (exit point is at end of EI).
+			}
+		}
+	}
+}