fix wrong git ignore

This commit is contained in:
Adrian Zürcher
2025-12-15 17:44:00 +01:00
parent ed9f31bb96
commit 8f313c00f0
126 changed files with 70589 additions and 1 deletions

View File

@@ -0,0 +1,7 @@
package contentstream
import "errors"
var (
ErrInvalidOperand = errors.New("invalid operand")
)

View File

@@ -0,0 +1,197 @@
package contentstream
import (
"bytes"
"fmt"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
)
type ContentStreamOperation struct {
Params []core.PdfObject
Operand string
}
type ContentStreamOperations []*ContentStreamOperation
// Check if the content stream operations are fully wrapped (within q ... Q)
func (s *ContentStreamOperations) isWrapped() bool {
if len(*s) < 2 {
return false
}
depth := 0
for _, op := range *s {
switch op.Operand {
case "q":
depth++
case "Q":
depth--
default:
if depth < 1 {
return false
}
}
}
// Should end at depth == 0
return depth == 0
}
// Wrap entire contents within q ... Q. If unbalanced, then adds extra Qs at the end.
// Only does if needed. Ensures that when adding new content, one start with all states
// in the default condition.
func (s *ContentStreamOperations) WrapIfNeeded() *ContentStreamOperations {
if len(*s) == 0 {
// No need to wrap if empty.
return s
}
if s.isWrapped() {
return s
}
*s = append([]*ContentStreamOperation{{Operand: "q"}}, *s...)
depth := 0
for _, op := range *s {
switch op.Operand {
case "q":
depth++
case "Q":
depth--
}
}
for depth > 0 {
*s = append(*s, &ContentStreamOperation{Operand: "Q"})
depth--
}
return s
}
// Convert a set of content stream operations to a content stream byte presentation, i.e. the kind that can be
// stored as a PDF stream or string format.
func (s *ContentStreamOperations) Bytes() []byte {
var buf bytes.Buffer
for _, op := range *s {
if op == nil {
continue
}
if op.Operand == "BI" {
// Inline image requires special handling.
buf.WriteString(op.Operand + "\n")
buf.WriteString(op.Params[0].DefaultWriteString())
} else {
// Default handler.
for _, param := range op.Params {
buf.WriteString(param.DefaultWriteString())
buf.WriteString(" ")
}
buf.WriteString(op.Operand + "\n")
}
}
return buf.Bytes()
}
// ExtractText parses and extracts all text data in content streams and returns as a string.
// Does not take into account Encoding table, the output is simply the character codes.
//
// Deprecated: More advanced text extraction is offered in package extractor with character encoding support.
func (s *ContentStreamParser) ExtractText() (string, error) {
operations, err := s.Parse()
if err != nil {
return "", err
}
inText := false
xPos, yPos := float64(-1), float64(-1)
txt := ""
for _, op := range *operations {
switch op.Operand {
case "BT":
inText = true
case "ET":
inText = false
}
if op.Operand == "Td" || op.Operand == "TD" || op.Operand == "T*" {
// Move to next line...
txt += "\n"
}
if op.Operand == "Tm" {
if len(op.Params) != 6 {
continue
}
xfloat, ok := op.Params[4].(*core.PdfObjectFloat)
if !ok {
xint, ok := op.Params[4].(*core.PdfObjectInteger)
if !ok {
continue
}
xfloat = core.MakeFloat(float64(*xint))
}
yfloat, ok := op.Params[5].(*core.PdfObjectFloat)
if !ok {
yint, ok := op.Params[5].(*core.PdfObjectInteger)
if !ok {
continue
}
yfloat = core.MakeFloat(float64(*yint))
}
if yPos == -1 {
yPos = float64(*yfloat)
} else if yPos > float64(*yfloat) {
txt += "\n"
xPos = float64(*xfloat)
yPos = float64(*yfloat)
continue
}
if xPos == -1 {
xPos = float64(*xfloat)
} else if xPos < float64(*xfloat) {
txt += "\t"
xPos = float64(*xfloat)
}
}
if inText && op.Operand == "TJ" {
if len(op.Params) < 1 {
continue
}
paramList, ok := op.Params[0].(*core.PdfObjectArray)
if !ok {
return "", fmt.Errorf("invalid parameter type, no array (%T)", op.Params[0])
}
for _, obj := range *paramList {
switch v := obj.(type) {
case *core.PdfObjectString:
txt += string(*v)
case *core.PdfObjectFloat:
if *v < -100 {
txt += " "
}
case *core.PdfObjectInteger:
if *v < -100 {
txt += " "
}
}
}
} else if inText && op.Operand == "Tj" {
if len(op.Params) < 1 {
continue
}
param, ok := op.Params[0].(*core.PdfObjectString)
if !ok {
return "", fmt.Errorf("invalid parameter type, not string (%T)", op.Params[0])
}
txt += string(*param)
}
}
return txt, nil
}

View File

@@ -0,0 +1,613 @@
package contentstream
import (
"math"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
)
type ContentCreator struct {
operands ContentStreamOperations
}
func NewContentCreator() *ContentCreator {
creator := &ContentCreator{}
creator.operands = ContentStreamOperations{}
return creator
}
// Get the list of operations.
func (cc *ContentCreator) Operations() *ContentStreamOperations {
return &cc.operands
}
// Convert a set of content stream operations to a content stream byte presentation, i.e. the kind that can be
// stored as a PDF stream or string format.
func (cc *ContentCreator) Bytes() []byte {
return cc.operands.Bytes()
}
// Same as Bytes() except returns as a string for convenience.
func (cc *ContentCreator) String() string {
return string(cc.operands.Bytes())
}
/* Graphics state operators. */
// Save the current graphics state on the stack - push.
func (cc *ContentCreator) Add_q() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "q"
cc.operands = append(cc.operands, &op)
return cc
}
// Restore the most recently stored state from the stack - pop.
func (cc *ContentCreator) Add_Q() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Q"
cc.operands = append(cc.operands, &op)
return cc
}
// Display XObject - image or form.
func (cc *ContentCreator) Add_Do(name core.PdfObjectName) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Do"
op.Params = makeParamsFromNames([]core.PdfObjectName{name})
cc.operands = append(cc.operands, &op)
return cc
}
// Modify the current transformation matrix (ctm).
func (cc *ContentCreator) Add_cm(a, b, c, d, e, f float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "cm"
op.Params = makeParamsFromFloats([]float64{a, b, c, d, e, f})
cc.operands = append(cc.operands, &op)
return cc
}
// Convenience function for generating a cm operation to translate the transformation matrix.
func (cc *ContentCreator) Translate(tx, ty float64) *ContentCreator {
return cc.Add_cm(1, 0, 0, 1, tx, ty)
}
// Convenience function for generating a cm command to scale the transformation matrix.
func (cc *ContentCreator) Scale(sx, sy float64) *ContentCreator {
return cc.Add_cm(sx, 0, 0, sy, 0, 0)
}
// Convenience function for generating a cm command to rotate transformation matrix.
func (cc *ContentCreator) RotateDeg(angle float64) *ContentCreator {
u1 := math.Cos(angle * math.Pi / 180.0)
u2 := math.Sin(angle * math.Pi / 180.0)
u3 := -math.Sin(angle * math.Pi / 180.0)
u4 := math.Cos(angle * math.Pi / 180.0)
return cc.Add_cm(u1, u2, u3, u4, 0, 0)
}
// Set the line width.
func (cc *ContentCreator) Add_w(lineWidth float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "w"
op.Params = makeParamsFromFloats([]float64{lineWidth})
cc.operands = append(cc.operands, &op)
return cc
}
// Set the line cap style.
func (cc *ContentCreator) Add_J(lineCapStyle string) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "J"
op.Params = makeParamsFromNames([]core.PdfObjectName{core.PdfObjectName(lineCapStyle)})
cc.operands = append(cc.operands, &op)
return cc
}
// Set the line join style.
func (cc *ContentCreator) Add_j(lineJoinStyle string) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "j"
op.Params = makeParamsFromNames([]core.PdfObjectName{core.PdfObjectName(lineJoinStyle)})
cc.operands = append(cc.operands, &op)
return cc
}
// Set the miter limit.
func (cc *ContentCreator) Add_M(miterlimit float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "M"
op.Params = makeParamsFromFloats([]float64{miterlimit})
cc.operands = append(cc.operands, &op)
return cc
}
// Set the line dash pattern.
func (cc *ContentCreator) Add_d(dashArray []int64, dashPhase int64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "d"
op.Params = []core.PdfObject{}
op.Params = append(op.Params, core.MakeArrayFromIntegers64(dashArray))
op.Params = append(op.Params, core.MakeInteger(dashPhase))
cc.operands = append(cc.operands, &op)
return cc
}
// Set the color rendering intent.
func (cc *ContentCreator) Add_ri(intent core.PdfObjectName) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "ri"
op.Params = makeParamsFromNames([]core.PdfObjectName{intent})
cc.operands = append(cc.operands, &op)
return cc
}
// Set the flatness tolerance.
func (cc *ContentCreator) Add_i(flatness float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "i"
op.Params = makeParamsFromFloats([]float64{flatness})
cc.operands = append(cc.operands, &op)
return cc
}
// Set the graphics state.
func (cc *ContentCreator) Add_gs(dictName core.PdfObjectName) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "gs"
op.Params = makeParamsFromNames([]core.PdfObjectName{dictName})
cc.operands = append(cc.operands, &op)
return cc
}
/* Path construction operators. */
// m: Move the current point to (x,y).
func (cc *ContentCreator) Add_m(x, y float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "m"
op.Params = makeParamsFromFloats([]float64{x, y})
cc.operands = append(cc.operands, &op)
return cc
}
// l: Append a straight line segment from the current point to (x,y).
func (cc *ContentCreator) Add_l(x, y float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "l"
op.Params = makeParamsFromFloats([]float64{x, y})
cc.operands = append(cc.operands, &op)
return cc
}
// c: Append a Bezier curve to the current path from the current point to (x3,y3) with (x1,x1) and (x2,y2) as control
// points.
func (cc *ContentCreator) Add_c(x1, y1, x2, y2, x3, y3 float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "c"
op.Params = makeParamsFromFloats([]float64{x1, y1, x2, y2, x3, y3})
cc.operands = append(cc.operands, &op)
return cc
}
// v: Append a Bezier curve to the current path from the current point to (x3,y3) with the current point and (x2,y2) as
// control points.
func (cc *ContentCreator) Add_v(x2, y2, x3, y3 float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "v"
op.Params = makeParamsFromFloats([]float64{x2, y2, x3, y3})
cc.operands = append(cc.operands, &op)
return cc
}
// y: Append a Bezier curve to the current path from the current point to (x3,y3) with (x1, y1) and (x3,y3) as
// control points.
func (cc *ContentCreator) Add_y(x1, y1, x3, y3 float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "y"
op.Params = makeParamsFromFloats([]float64{x1, y1, x3, y3})
cc.operands = append(cc.operands, &op)
return cc
}
// h: Close the current subpath by adding a line between the current position and the starting position.
func (cc *ContentCreator) Add_h() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "h"
cc.operands = append(cc.operands, &op)
return cc
}
// re: Append a rectangle to the current path as a complete subpath, with lower left corner (x,y).
func (cc *ContentCreator) Add_re(x, y, width, height float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "re"
op.Params = makeParamsFromFloats([]float64{x, y, width, height})
cc.operands = append(cc.operands, &op)
return cc
}
/* Path painting operators. */
// S: stroke the path.
func (cc *ContentCreator) Add_S() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "S"
cc.operands = append(cc.operands, &op)
return cc
}
// s: Close and stroke the path.
func (cc *ContentCreator) Add_s() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "s"
cc.operands = append(cc.operands, &op)
return cc
}
// f: Fill the path using the nonzero winding number rule to determine fill region.
func (cc *ContentCreator) Add_f() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "f"
cc.operands = append(cc.operands, &op)
return cc
}
// f*: Fill the path using the even-odd rule to determine fill region.
func (cc *ContentCreator) Add_f_starred() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "f*"
cc.operands = append(cc.operands, &op)
return cc
}
// B: Fill and then stroke the path (nonzero winding number rule).
func (cc *ContentCreator) Add_B() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "B"
cc.operands = append(cc.operands, &op)
return cc
}
// B*: Fill and then stroke the path (even-odd rule).
func (cc *ContentCreator) Add_B_starred() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "B*"
cc.operands = append(cc.operands, &op)
return cc
}
// b: Close, fill and then stroke the path (nonzero winding number rule).
func (cc *ContentCreator) Add_b() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "b"
cc.operands = append(cc.operands, &op)
return cc
}
// b*: Close, fill and then stroke the path (even-odd winding number rule).
func (cc *ContentCreator) Add_b_starred() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "b*"
cc.operands = append(cc.operands, &op)
return cc
}
// n: End the path without filling or stroking.
func (cc *ContentCreator) Add_n() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "n"
cc.operands = append(cc.operands, &op)
return cc
}
/* Clipping path operators. */
// W: Modify the current clipping path by intersecting with the current path (nonzero winding rule).
func (cc *ContentCreator) Add_W() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "W"
cc.operands = append(cc.operands, &op)
return cc
}
// W*: Modify the current clipping path by intersecting with the current path (even odd rule).
func (cc *ContentCreator) Add_W_starred() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "W*"
cc.operands = append(cc.operands, &op)
return cc
}
/* Color operators. */
// CS: Set the current colorspace for stroking operations.
func (cc *ContentCreator) Add_CS(name core.PdfObjectName) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "CS"
op.Params = makeParamsFromNames([]core.PdfObjectName{name})
cc.operands = append(cc.operands, &op)
return cc
}
// cs: Same as CS but for non-stroking operations.
func (cc *ContentCreator) Add_cs(name core.PdfObjectName) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "cs"
op.Params = makeParamsFromNames([]core.PdfObjectName{name})
cc.operands = append(cc.operands, &op)
return cc
}
// SC: Set color for stroking operations. Input: c1, ..., cn.
func (cc *ContentCreator) Add_SC(c ...float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "SC"
op.Params = makeParamsFromFloats(c)
cc.operands = append(cc.operands, &op)
return cc
}
// SCN: Same as SC but supports more colorspaces.
func (cc *ContentCreator) Add_SCN(c ...float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "SCN"
op.Params = makeParamsFromFloats(c)
cc.operands = append(cc.operands, &op)
return cc
}
// SCN with name attribute (for pattern). Syntax: c1 ... cn name SCN.
func (cc *ContentCreator) Add_SCN_pattern(name core.PdfObjectName, c ...float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "SCN"
op.Params = makeParamsFromFloats(c)
op.Params = append(op.Params, core.MakeName(string(name)))
cc.operands = append(cc.operands, &op)
return cc
}
// scn: Same as SC but for nonstroking operations.
func (cc *ContentCreator) Add_scn(c ...float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "scn"
op.Params = makeParamsFromFloats(c)
cc.operands = append(cc.operands, &op)
return cc
}
// scn with name attribute (for pattern). Syntax: c1 ... cn name scn.
func (cc *ContentCreator) Add_scn_pattern(name core.PdfObjectName, c ...float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "scn"
op.Params = makeParamsFromFloats(c)
op.Params = append(op.Params, core.MakeName(string(name)))
cc.operands = append(cc.operands, &op)
return cc
}
// G: Set the stroking colorspace to DeviceGray and sets the gray level (0-1).
func (cc *ContentCreator) Add_G(gray float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "G"
op.Params = makeParamsFromFloats([]float64{gray})
cc.operands = append(cc.operands, &op)
return cc
}
// g: Same as G but used for nonstroking operations.
func (cc *ContentCreator) Add_g(gray float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "g"
op.Params = makeParamsFromFloats([]float64{gray})
cc.operands = append(cc.operands, &op)
return cc
}
// RG: Set the stroking colorspace to DeviceRGB and sets the r,g,b colors (0-1 each).
func (cc *ContentCreator) Add_RG(r, g, b float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "RG"
op.Params = makeParamsFromFloats([]float64{r, g, b})
cc.operands = append(cc.operands, &op)
return cc
}
// rg: Same as RG but used for nonstroking operations.
func (cc *ContentCreator) Add_rg(r, g, b float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "rg"
op.Params = makeParamsFromFloats([]float64{r, g, b})
cc.operands = append(cc.operands, &op)
return cc
}
// K: Set the stroking colorspace to DeviceCMYK and sets the c,m,y,k color (0-1 each component).
func (cc *ContentCreator) Add_K(c, m, y, k float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "K"
op.Params = makeParamsFromFloats([]float64{c, m, y, k})
cc.operands = append(cc.operands, &op)
return cc
}
// k: Same as K but used for nonstroking operations.
func (cc *ContentCreator) Add_k(c, m, y, k float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "k"
op.Params = makeParamsFromFloats([]float64{c, m, y, k})
cc.operands = append(cc.operands, &op)
return cc
}
/* Shading operators. */
func (cc *ContentCreator) Add_sh(name core.PdfObjectName) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "sh"
op.Params = makeParamsFromNames([]core.PdfObjectName{name})
cc.operands = append(cc.operands, &op)
return cc
}
/* Text related operators */
/* Text state operators */
// BT: Begin text.
func (cc *ContentCreator) Add_BT() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "BT"
cc.operands = append(cc.operands, &op)
return cc
}
// ET: End text.
func (cc *ContentCreator) Add_ET() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "ET"
cc.operands = append(cc.operands, &op)
return cc
}
// Tc: Set character spacing.
func (cc *ContentCreator) Add_Tc(charSpace float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tc"
op.Params = makeParamsFromFloats([]float64{charSpace})
cc.operands = append(cc.operands, &op)
return cc
}
// Tw: Set word spacing.
func (cc *ContentCreator) Add_Tw(wordSpace float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tw"
op.Params = makeParamsFromFloats([]float64{wordSpace})
cc.operands = append(cc.operands, &op)
return cc
}
// Tz: Set horizontal scaling.
func (cc *ContentCreator) Add_Tz(scale float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tz"
op.Params = makeParamsFromFloats([]float64{scale})
cc.operands = append(cc.operands, &op)
return cc
}
// TL: Set leading.
func (cc *ContentCreator) Add_TL(leading float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "TL"
op.Params = makeParamsFromFloats([]float64{leading})
cc.operands = append(cc.operands, &op)
return cc
}
// Tf: Set font and font size.
func (cc *ContentCreator) Add_Tf(fontName core.PdfObjectName, fontSize float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tf"
op.Params = makeParamsFromNames([]core.PdfObjectName{fontName})
op.Params = append(op.Params, makeParamsFromFloats([]float64{fontSize})...)
cc.operands = append(cc.operands, &op)
return cc
}
// Tr: Set text rendering mode.
func (cc *ContentCreator) Add_Tr(render int64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tr"
op.Params = makeParamsFromInts([]int64{render})
cc.operands = append(cc.operands, &op)
return cc
}
// Ts: Set text rise.
func (cc *ContentCreator) Add_Ts(rise float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Ts"
op.Params = makeParamsFromFloats([]float64{rise})
cc.operands = append(cc.operands, &op)
return cc
}
/* Text positioning operators. */
// Td: Move to start of next line with offset (tx, ty).
func (cc *ContentCreator) Add_Td(tx, ty float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Td"
op.Params = makeParamsFromFloats([]float64{tx, ty})
cc.operands = append(cc.operands, &op)
return cc
}
// TD: Move to start of next line with offset (tx, ty).
func (cc *ContentCreator) Add_TD(tx, ty float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "TD"
op.Params = makeParamsFromFloats([]float64{tx, ty})
cc.operands = append(cc.operands, &op)
return cc
}
// Tm: Set the text line matrix.
func (cc *ContentCreator) Add_Tm(a, b, c, d, e, f float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tm"
op.Params = makeParamsFromFloats([]float64{a, b, c, d, e, f})
cc.operands = append(cc.operands, &op)
return cc
}
// T*: Move to the start of next line.
func (cc *ContentCreator) Add_Tstar() *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "T*"
cc.operands = append(cc.operands, &op)
return cc
}
/* Text showing operators */
// Tj: Show a text string.
func (cc *ContentCreator) Add_Tj(textstr core.PdfObjectString) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "Tj"
op.Params = makeParamsFromStrings([]core.PdfObjectString{textstr})
cc.operands = append(cc.operands, &op)
return cc
}
// ': Move to next line and show a string.
func (cc *ContentCreator) Add_quote(textstr core.PdfObjectString) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "'"
op.Params = makeParamsFromStrings([]core.PdfObjectString{textstr})
cc.operands = append(cc.operands, &op)
return cc
}
// ”: Move to next line and show a string, using aw and ac as word and character spacing respectively.
func (cc *ContentCreator) Add_quotes(textstr core.PdfObjectString, aw, ac float64) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "''"
op.Params = makeParamsFromFloats([]float64{aw, ac})
op.Params = append(op.Params, makeParamsFromStrings([]core.PdfObjectString{textstr})...)
cc.operands = append(cc.operands, &op)
return cc
}
// TJ. Show one or more text string. Array of numbers (displacement) and strings.
func (cc *ContentCreator) Add_TJ(vals ...core.PdfObject) *ContentCreator {
op := ContentStreamOperation{}
op.Operand = "TJ"
op.Params = []core.PdfObject{core.MakeArray(vals...)}
cc.operands = append(cc.operands, &op)
return cc
}

View File

@@ -0,0 +1,149 @@
package draw
import (
"math"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model"
)
// Cubic bezier curves are defined by:
// R(t) = P0*(1-t)^3 + P1*3*t*(1-t)^2 + P2*3*t^2*(1-t) + P3*t^3
// where P0 is the current point, P1, P2 control points and P3 the final point.
type CubicBezierCurve struct {
P0 Point // Starting point.
P1 Point // Control point 1.
P2 Point // Control point 2.
P3 Point // Final point.
}
func NewCubicBezierCurve(x0, y0, x1, y1, x2, y2, x3, y3 float64) CubicBezierCurve {
curve := CubicBezierCurve{}
curve.P0 = NewPoint(x0, y0)
curve.P1 = NewPoint(x1, y1)
curve.P2 = NewPoint(x2, y2)
curve.P3 = NewPoint(x3, y3)
return curve
}
// Add X,Y offset to all points on a curve.
func (curve CubicBezierCurve) AddOffsetXY(offX, offY float64) CubicBezierCurve {
curve.P0.X += offX
curve.P1.X += offX
curve.P2.X += offX
curve.P3.X += offX
curve.P0.Y += offY
curve.P1.Y += offY
curve.P2.Y += offY
curve.P3.Y += offY
return curve
}
func (curve CubicBezierCurve) GetBounds() model.PdfRectangle {
minX := curve.P0.X
maxX := curve.P0.X
minY := curve.P0.Y
maxY := curve.P0.Y
// 1000 points.
for t := 0.0; t <= 1.0; t += 0.001 {
Rx := curve.P0.X*math.Pow(1-t, 3) +
curve.P1.X*3*t*math.Pow(1-t, 2) +
curve.P2.X*3*math.Pow(t, 2)*(1-t) +
curve.P3.X*math.Pow(t, 3)
Ry := curve.P0.Y*math.Pow(1-t, 3) +
curve.P1.Y*3*t*math.Pow(1-t, 2) +
curve.P2.Y*3*math.Pow(t, 2)*(1-t) +
curve.P3.Y*math.Pow(t, 3)
if Rx < minX {
minX = Rx
}
if Rx > maxX {
maxX = Rx
}
if Ry < minY {
minY = Ry
}
if Ry > maxY {
maxY = Ry
}
}
bounds := model.PdfRectangle{}
bounds.Llx = minX
bounds.Lly = minY
bounds.Urx = maxX
bounds.Ury = maxY
return bounds
}
type CubicBezierPath struct {
Curves []CubicBezierCurve
}
func NewCubicBezierPath() CubicBezierPath {
bpath := CubicBezierPath{}
bpath.Curves = []CubicBezierCurve{}
return bpath
}
func (this CubicBezierPath) AppendCurve(curve CubicBezierCurve) CubicBezierPath {
this.Curves = append(this.Curves, curve)
return this
}
func (bpath CubicBezierPath) Copy() CubicBezierPath {
bpathcopy := CubicBezierPath{}
bpathcopy.Curves = []CubicBezierCurve{}
for _, c := range bpath.Curves {
bpathcopy.Curves = append(bpathcopy.Curves, c)
}
return bpathcopy
}
func (bpath CubicBezierPath) Offset(offX, offY float64) CubicBezierPath {
for i, c := range bpath.Curves {
bpath.Curves[i] = c.AddOffsetXY(offX, offY)
}
return bpath
}
func (bpath CubicBezierPath) GetBoundingBox() Rectangle {
bbox := Rectangle{}
minX := 0.0
maxX := 0.0
minY := 0.0
maxY := 0.0
for idx, c := range bpath.Curves {
curveBounds := c.GetBounds()
if idx == 0 {
minX = curveBounds.Llx
maxX = curveBounds.Urx
minY = curveBounds.Lly
maxY = curveBounds.Ury
continue
}
if curveBounds.Llx < minX {
minX = curveBounds.Llx
}
if curveBounds.Urx > maxX {
maxX = curveBounds.Urx
}
if curveBounds.Lly < minY {
minY = curveBounds.Lly
}
if curveBounds.Ury > maxY {
maxY = curveBounds.Ury
}
}
bbox.X = minX
bbox.Y = minY
bbox.Width = maxX - minX
bbox.Height = maxY - minY
return bbox
}

View File

@@ -0,0 +1,98 @@
package draw
// A path consists of straight line connections between each point defined in an array of points.
type Path struct {
Points []Point
}
func NewPath() Path {
path := Path{}
path.Points = []Point{}
return path
}
func (this Path) AppendPoint(point Point) Path {
this.Points = append(this.Points, point)
return this
}
func (this Path) RemovePoint(number int) Path {
if number < 1 || number > len(this.Points) {
return this
}
idx := number - 1
this.Points = append(this.Points[:idx], this.Points[idx+1:]...)
return this
}
func (this Path) Length() int {
return len(this.Points)
}
func (this Path) GetPointNumber(number int) Point {
if number < 1 || number > len(this.Points) {
return Point{}
}
return this.Points[number-1]
}
func (path Path) Copy() Path {
pathcopy := Path{}
pathcopy.Points = []Point{}
for _, p := range path.Points {
pathcopy.Points = append(pathcopy.Points, p)
}
return pathcopy
}
func (path Path) Offset(offX, offY float64) Path {
for i, p := range path.Points {
path.Points[i] = p.Add(offX, offY)
}
return path
}
func (path Path) GetBoundingBox() BoundingBox {
bbox := BoundingBox{}
minX := 0.0
maxX := 0.0
minY := 0.0
maxY := 0.0
for idx, p := range path.Points {
if idx == 0 {
minX = p.X
maxX = p.X
minY = p.Y
maxY = p.Y
continue
}
if p.X < minX {
minX = p.X
}
if p.X > maxX {
maxX = p.X
}
if p.Y < minY {
minY = p.Y
}
if p.Y > maxY {
maxY = p.Y
}
}
bbox.X = minX
bbox.Y = minY
bbox.Width = maxX - minX
bbox.Height = maxY - minY
return bbox
}
type BoundingBox struct {
X float64
Y float64
Width float64
Height float64
}

View File

@@ -0,0 +1,32 @@
package draw
import "fmt"
type Point struct {
X float64
Y float64
}
func NewPoint(x, y float64) Point {
point := Point{}
point.X = x
point.Y = y
return point
}
func (p Point) Add(dx, dy float64) Point {
p.X += dx
p.Y += dy
return p
}
// Add vector to a point.
func (this Point) AddVector(v Vector) Point {
this.X += v.Dx
this.Y += v.Dy
return this
}
func (p Point) String() string {
return fmt.Sprintf("(%.1f,%.1f)", p.X, p.Y)
}

View File

@@ -0,0 +1,353 @@
package draw
import (
"math"
pdfcontent "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/contentstream"
pdfcore "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
pdf "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model"
)
type Circle struct {
X float64
Y float64
Width float64
Height float64
FillEnabled bool // Show fill?
FillColor *pdf.PdfColorDeviceRGB
BorderEnabled bool // Show border?
BorderWidth float64
BorderColor *pdf.PdfColorDeviceRGB
Opacity float64 // Alpha value (0-1).
}
// Draw a circle. Can specify a graphics state (gsName) for setting opacity etc. Otherwise leave empty ("").
// Returns the content stream as a byte array, the bounding box and an error on failure.
func (c Circle) Draw(gsName string) ([]byte, *pdf.PdfRectangle, error) {
xRad := c.Width / 2
yRad := c.Height / 2
if c.BorderEnabled {
xRad -= c.BorderWidth / 2
yRad -= c.BorderWidth / 2
}
magic := 0.551784
xMagic := xRad * magic
yMagic := yRad * magic
bpath := NewCubicBezierPath()
bpath = bpath.AppendCurve(NewCubicBezierCurve(-xRad, 0, -xRad, yMagic, -xMagic, yRad, 0, yRad))
bpath = bpath.AppendCurve(NewCubicBezierCurve(0, yRad, xMagic, yRad, xRad, yMagic, xRad, 0))
bpath = bpath.AppendCurve(NewCubicBezierCurve(xRad, 0, xRad, -yMagic, xMagic, -yRad, 0, -yRad))
bpath = bpath.AppendCurve(NewCubicBezierCurve(0, -yRad, -xMagic, -yRad, -xRad, -yMagic, -xRad, 0))
bpath = bpath.Offset(xRad, yRad)
if c.BorderEnabled {
bpath = bpath.Offset(c.BorderWidth/2, c.BorderWidth/2)
}
if c.X != 0 || c.Y != 0 {
bpath = bpath.Offset(c.X, c.Y)
}
creator := pdfcontent.NewContentCreator()
creator.Add_q()
if c.FillEnabled {
creator.Add_rg(c.FillColor.R(), c.FillColor.G(), c.FillColor.B())
}
if c.BorderEnabled {
creator.Add_RG(c.BorderColor.R(), c.BorderColor.G(), c.BorderColor.B())
creator.Add_w(c.BorderWidth)
}
if len(gsName) > 1 {
// If a graphics state is provided, use it. (Used for transparency settings here).
creator.Add_gs(pdfcore.PdfObjectName(gsName))
}
DrawBezierPathWithCreator(bpath, creator)
creator.Add_h() // Close the path.
if c.FillEnabled && c.BorderEnabled {
creator.Add_B() // fill and stroke.
} else if c.FillEnabled {
creator.Add_f() // Fill.
} else if c.BorderEnabled {
creator.Add_S() // Stroke.
}
creator.Add_Q()
// Get bounding box.
pathBbox := bpath.GetBoundingBox()
if c.BorderEnabled {
// Account for stroke width.
pathBbox.Height += c.BorderWidth
pathBbox.Width += c.BorderWidth
pathBbox.X -= c.BorderWidth / 2
pathBbox.Y -= c.BorderWidth / 2
}
// Bounding box - global coordinate system.
bbox := &pdf.PdfRectangle{}
bbox.Llx = pathBbox.X
bbox.Lly = pathBbox.Y
bbox.Urx = pathBbox.X + pathBbox.Width
bbox.Ury = pathBbox.Y + pathBbox.Height
return creator.Bytes(), bbox, nil
}
// A rectangle defined with a specified Width and Height and a lower left corner at (X,Y). The rectangle can
// optionally have a border and a filling color.
// The Width/Height includes the border (if any specified), i.e. is positioned inside.
type Rectangle struct {
X float64
Y float64
Width float64
Height float64
FillEnabled bool // Show fill?
FillColor *pdf.PdfColorDeviceRGB
BorderEnabled bool // Show border?
BorderWidth float64
BorderColor *pdf.PdfColorDeviceRGB
Opacity float64 // Alpha value (0-1).
}
// Draw the circle. Can specify a graphics state (gsName) for setting opacity etc. Otherwise leave empty ("").
// Returns the content stream as a byte array, bounding box and an error on failure.
func (rect Rectangle) Draw(gsName string) ([]byte, *pdf.PdfRectangle, error) {
path := NewPath()
path = path.AppendPoint(NewPoint(0, 0))
path = path.AppendPoint(NewPoint(0, rect.Height))
path = path.AppendPoint(NewPoint(rect.Width, rect.Height))
path = path.AppendPoint(NewPoint(rect.Width, 0))
path = path.AppendPoint(NewPoint(0, 0))
if rect.X != 0 || rect.Y != 0 {
path = path.Offset(rect.X, rect.Y)
}
creator := pdfcontent.NewContentCreator()
creator.Add_q()
if rect.FillEnabled {
creator.Add_rg(rect.FillColor.R(), rect.FillColor.G(), rect.FillColor.B())
}
if rect.BorderEnabled {
creator.Add_RG(rect.BorderColor.R(), rect.BorderColor.G(), rect.BorderColor.B())
creator.Add_w(rect.BorderWidth)
}
if len(gsName) > 1 {
// If a graphics state is provided, use it. (Used for transparency settings here).
creator.Add_gs(pdfcore.PdfObjectName(gsName))
}
DrawPathWithCreator(path, creator)
creator.Add_h() // Close the path.
if rect.FillEnabled && rect.BorderEnabled {
creator.Add_B() // fill and stroke.
} else if rect.FillEnabled {
creator.Add_f() // Fill.
} else if rect.BorderEnabled {
creator.Add_S() // Stroke.
}
creator.Add_Q()
// Get bounding box.
pathBbox := path.GetBoundingBox()
// Bounding box - global coordinate system.
bbox := &pdf.PdfRectangle{}
bbox.Llx = pathBbox.X
bbox.Lly = pathBbox.Y
bbox.Urx = pathBbox.X + pathBbox.Width
bbox.Ury = pathBbox.Y + pathBbox.Height
return creator.Bytes(), bbox, nil
}
// The currently supported line ending styles are None, Arrow (ClosedArrow) and Butt.
type LineEndingStyle int
const (
LineEndingStyleNone LineEndingStyle = 0
LineEndingStyleArrow LineEndingStyle = 1
LineEndingStyleButt LineEndingStyle = 2
)
// Defines a line between point 1 (X1,Y1) and point 2 (X2,Y2). The line ending styles can be none (regular line),
// or arrows at either end. The line also has a specified width, color and opacity.
type Line struct {
X1 float64
Y1 float64
X2 float64
Y2 float64
LineColor *pdf.PdfColorDeviceRGB
Opacity float64 // Alpha value (0-1).
LineWidth float64
LineEndingStyle1 LineEndingStyle // Line ending style of point 1.
LineEndingStyle2 LineEndingStyle // Line ending style of point 2.
}
// Draw a line in PDF. Generates the content stream which can be used in page contents or appearance stream of annotation.
// Returns the stream content, XForm bounding box (local), bounding box and an error if one occurred.
func (line Line) Draw(gsName string) ([]byte, *pdf.PdfRectangle, error) {
x1, x2 := line.X1, line.X2
y1, y2 := line.Y1, line.Y2
dy := y2 - y1
dx := x2 - x1
theta := math.Atan2(dy, dx)
L := math.Sqrt(math.Pow(dx, 2.0) + math.Pow(dy, 2.0))
w := line.LineWidth
pi := math.Pi
mul := 1.0
if dx < 0 {
mul *= -1.0
}
if dy < 0 {
mul *= -1.0
}
// Vs.
VsX := mul * (-w / 2 * math.Cos(theta+pi/2))
VsY := mul * (-w/2*math.Sin(theta+pi/2) + w*math.Sin(theta+pi/2))
// V1.
V1X := VsX + w/2*math.Cos(theta+pi/2)
V1Y := VsY + w/2*math.Sin(theta+pi/2)
// P2.
V2X := VsX + w/2*math.Cos(theta+pi/2) + L*math.Cos(theta)
V2Y := VsY + w/2*math.Sin(theta+pi/2) + L*math.Sin(theta)
// P3.
V3X := VsX + w/2*math.Cos(theta+pi/2) + L*math.Cos(theta) + w*math.Cos(theta-pi/2)
V3Y := VsY + w/2*math.Sin(theta+pi/2) + L*math.Sin(theta) + w*math.Sin(theta-pi/2)
// P4.
V4X := VsX + w/2*math.Cos(theta-pi/2)
V4Y := VsY + w/2*math.Sin(theta-pi/2)
path := NewPath()
path = path.AppendPoint(NewPoint(V1X, V1Y))
path = path.AppendPoint(NewPoint(V2X, V2Y))
path = path.AppendPoint(NewPoint(V3X, V3Y))
path = path.AppendPoint(NewPoint(V4X, V4Y))
lineEnding1 := line.LineEndingStyle1
lineEnding2 := line.LineEndingStyle2
// TODO: Allow custom height/widths.
arrowHeight := 3 * w
arrowWidth := 3 * w
arrowExtruding := (arrowWidth - w) / 2
if lineEnding2 == LineEndingStyleArrow {
// Convert P2, P3
p2 := path.GetPointNumber(2)
va1 := NewVectorPolar(arrowHeight, theta+pi)
pa1 := p2.AddVector(va1)
bVec := NewVectorPolar(arrowWidth/2, theta+pi/2)
aVec := NewVectorPolar(arrowHeight, theta)
va2 := NewVectorPolar(arrowExtruding, theta+pi/2)
pa2 := pa1.AddVector(va2)
va3 := aVec.Add(bVec.Flip())
pa3 := pa2.AddVector(va3)
va4 := bVec.Scale(2).Flip().Add(va3.Flip())
pa4 := pa3.AddVector(va4)
pa5 := pa1.AddVector(NewVectorPolar(w, theta-pi/2))
newpath := NewPath()
newpath = newpath.AppendPoint(path.GetPointNumber(1))
newpath = newpath.AppendPoint(pa1)
newpath = newpath.AppendPoint(pa2)
newpath = newpath.AppendPoint(pa3)
newpath = newpath.AppendPoint(pa4)
newpath = newpath.AppendPoint(pa5)
newpath = newpath.AppendPoint(path.GetPointNumber(4))
path = newpath
}
if lineEnding1 == LineEndingStyleArrow {
// Get the first and last points.
p1 := path.GetPointNumber(1)
pn := path.GetPointNumber(path.Length())
// First three points on arrow.
v1 := NewVectorPolar(w/2, theta+pi+pi/2)
pa1 := p1.AddVector(v1)
v2 := NewVectorPolar(arrowHeight, theta).Add(NewVectorPolar(arrowWidth/2, theta+pi/2))
pa2 := pa1.AddVector(v2)
v3 := NewVectorPolar(arrowExtruding, theta-pi/2)
pa3 := pa2.AddVector(v3)
// Last three points
v5 := NewVectorPolar(arrowHeight, theta)
pa5 := pn.AddVector(v5)
v6 := NewVectorPolar(arrowExtruding, theta+pi+pi/2)
pa6 := pa5.AddVector(v6)
pa7 := pa1
newpath := NewPath()
newpath = newpath.AppendPoint(pa1)
newpath = newpath.AppendPoint(pa2)
newpath = newpath.AppendPoint(pa3)
for _, p := range path.Points[1 : len(path.Points)-1] {
newpath = newpath.AppendPoint(p)
}
newpath = newpath.AppendPoint(pa5)
newpath = newpath.AppendPoint(pa6)
newpath = newpath.AppendPoint(pa7)
path = newpath
}
creator := pdfcontent.NewContentCreator()
// Draw line with arrow
creator.
Add_q().
Add_rg(line.LineColor.R(), line.LineColor.G(), line.LineColor.B())
if len(gsName) > 1 {
// If a graphics state is provided, use it. (Used for transparency settings here).
creator.Add_gs(pdfcore.PdfObjectName(gsName))
}
path = path.Offset(line.X1, line.Y1)
pathBbox := path.GetBoundingBox()
DrawPathWithCreator(path, creator)
creator.Add_f().
//creator.Add_S().
Add_Q()
/*
// Offsets (needed for placement of annotations bbox).
offX := x1 - VsX
offY := y1 - VsY
*/
// Bounding box - global coordinate system.
bbox := &pdf.PdfRectangle{}
bbox.Llx = pathBbox.X
bbox.Lly = pathBbox.Y
bbox.Urx = pathBbox.X + pathBbox.Width
bbox.Ury = pathBbox.Y + pathBbox.Height
return creator.Bytes(), bbox, nil
}

View File

@@ -0,0 +1,28 @@
package draw
import (
pdfcontent "gitea.tecamino.com/paadi/pdfmerge/internal/pdf/contentstream"
)
// Make the path with the content creator.
// Adds the PDF commands to draw the path to the creator instance.
func DrawPathWithCreator(path Path, creator *pdfcontent.ContentCreator) {
for idx, p := range path.Points {
if idx == 0 {
creator.Add_m(p.X, p.Y)
} else {
creator.Add_l(p.X, p.Y)
}
}
}
// Make the bezier path with the content creator.
// Adds the PDF commands to draw the path to the creator instance.
func DrawBezierPathWithCreator(bpath CubicBezierPath, creator *pdfcontent.ContentCreator) {
for idx, c := range bpath.Curves {
if idx == 0 {
creator.Add_m(c.P0.X, c.P0.Y)
}
creator.Add_c(c.P1.X, c.P1.Y, c.P2.X, c.P2.Y, c.P3.X, c.P3.Y)
}
}

View File

@@ -0,0 +1,80 @@
package draw
import "math"
type Vector struct {
Dx float64
Dy float64
}
func NewVector(dx, dy float64) Vector {
v := Vector{}
v.Dx = dx
v.Dy = dy
return v
}
func NewVectorBetween(a Point, b Point) Vector {
v := Vector{}
v.Dx = b.X - a.X
v.Dy = b.Y - a.Y
return v
}
func NewVectorPolar(length float64, theta float64) Vector {
v := Vector{}
v.Dx = length * math.Cos(theta)
v.Dy = length * math.Sin(theta)
return v
}
func (v Vector) Add(other Vector) Vector {
v.Dx += other.Dx
v.Dy += other.Dy
return v
}
func (v Vector) Rotate(phi float64) Vector {
mag := v.Magnitude()
angle := v.GetPolarAngle()
return NewVectorPolar(mag, angle+phi)
}
// Change the sign of the vector: -vector.
func (this Vector) Flip() Vector {
mag := this.Magnitude()
theta := this.GetPolarAngle()
this.Dx = mag * math.Cos(theta+math.Pi)
this.Dy = mag * math.Sin(theta+math.Pi)
return this
}
func (v Vector) FlipY() Vector {
v.Dy = -v.Dy
return v
}
func (v Vector) FlipX() Vector {
v.Dx = -v.Dx
return v
}
func (this Vector) Scale(factor float64) Vector {
mag := this.Magnitude()
theta := this.GetPolarAngle()
this.Dx = factor * mag * math.Cos(theta)
this.Dy = factor * mag * math.Sin(theta)
return this
}
func (this Vector) Magnitude() float64 {
return math.Sqrt(math.Pow(this.Dx, 2.0) + math.Pow(this.Dy, 2.0))
}
func (this Vector) GetPolarAngle() float64 {
return math.Atan2(this.Dy, this.Dx)
}

View File

@@ -0,0 +1,390 @@
package contentstream
import (
"bytes"
"errors"
"fmt"
gocolor "image/color"
"image/jpeg"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
)
// Creates the encoder for the inline image's Filter and DecodeParms.
func newEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (core.StreamEncoder, error) {
if inlineImage.Filter == nil {
// No filter, return raw data back.
return core.NewRawEncoder(), nil
}
// The filter should be a name or an array with a list of filter names.
filterName, ok := inlineImage.Filter.(*core.PdfObjectName)
if !ok {
array, ok := inlineImage.Filter.(*core.PdfObjectArray)
if !ok {
return nil, fmt.Errorf("filter not a Name or Array object")
}
if len(*array) == 0 {
// Empty array -> indicates raw filter (no filter).
return core.NewRawEncoder(), nil
}
if len(*array) != 1 {
menc, err := newMultiEncoderFromInlineImage(inlineImage)
if err != nil {
common.Log.Error("failed creating multi encoder: %v", err)
return nil, err
}
common.Log.Trace("Multi enc: %s\n", menc)
return menc, nil
}
// Single element.
filterObj := (*array)[0]
filterName, ok = filterObj.(*core.PdfObjectName)
if !ok {
return nil, fmt.Errorf("filter array member not a Name object")
}
}
// From Table 94 p. 224 (PDF32000_2008):
// Additional Abbreviations in an Inline Image Object:
switch *filterName {
case "AHx", "ASCIIHexDecode":
return core.NewASCIIHexEncoder(), nil
case "A85", "ASCII85Decode":
return core.NewASCII85Encoder(), nil
case "DCT", "DCTDecode":
return newDCTEncoderFromInlineImage(inlineImage)
case "Fl", "FlateDecode":
return newFlateEncoderFromInlineImage(inlineImage, nil)
case "LZW", "LZWDecode":
return newLZWEncoderFromInlineImage(inlineImage, nil)
case "CCF", "CCITTFaxDecode":
return core.NewCCITTFaxEncoder(), nil
case "RL", "RunLengthDecode":
return core.NewRunLengthEncoder(), nil
default:
common.Log.Debug("unsupported inline image encoding filter name : %s", *filterName)
return nil, errors.New("unsupported inline encoding method")
}
}
// Create a new flate decoder from an inline image object, getting all the encoding parameters
// from the DecodeParms stream object dictionary entry that can be provided optionally, usually
// only when a multi filter is used.
func newFlateEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeParams *core.PdfObjectDictionary) (*core.FlateEncoder, error) {
encoder := core.NewFlateEncoder()
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := inlineImage.DecodeParms
if obj != nil {
dp, isDict := obj.(*core.PdfObjectDictionary)
if !isDict {
common.Log.Debug("error: DecodeParms not a dictionary (%T)", obj)
return nil, fmt.Errorf("invalid DecodeParms")
}
decodeParams = dp
}
}
if decodeParams == nil {
// Can safely return here if no decode params, as the following depend on the decode params.
return encoder, nil
}
common.Log.Trace("decode params: %s", decodeParams.String())
obj := decodeParams.Get("Predictor")
if obj == nil {
common.Log.Debug("error: Predictor missing from DecodeParms - Continue with default (1)")
} else {
predictor, ok := obj.(*core.PdfObjectInteger)
if !ok {
common.Log.Debug("error: Predictor specified but not numeric (%T)", obj)
return nil, fmt.Errorf("invalid Predictor")
}
encoder.Predictor = int(*predictor)
}
// Bits per component. Use default if not specified (8).
obj = decodeParams.Get("BitsPerComponent")
if obj != nil {
bpc, ok := obj.(*core.PdfObjectInteger)
if !ok {
common.Log.Debug("error: Invalid BitsPerComponent")
return nil, fmt.Errorf("invalid BitsPerComponent")
}
encoder.BitsPerComponent = int(*bpc)
}
if encoder.Predictor > 1 {
// Columns.
encoder.Columns = 1
obj = decodeParams.Get("Columns")
if obj != nil {
columns, ok := obj.(*core.PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("predictor column invalid")
}
encoder.Columns = int(*columns)
}
// Colors.
// Number of interleaved color components per sample (Default 1 if not specified)
encoder.Colors = 1
obj := decodeParams.Get("Colors")
if obj != nil {
colors, ok := obj.(*core.PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("predictor colors not an integer")
}
encoder.Colors = int(*colors)
}
}
return encoder, nil
}
// Create a new LZW encoder/decoder based on an inline image object, getting all the encoding parameters
// from the DecodeParms stream object dictionary entry.
func newLZWEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeParams *core.PdfObjectDictionary) (*core.LZWEncoder, error) {
// Start with default settings.
encoder := core.NewLZWEncoder()
// If decodeParams not provided, see if we can get from the inline image directly.
if decodeParams == nil {
if inlineImage.DecodeParms != nil {
dp, isDict := inlineImage.DecodeParms.(*core.PdfObjectDictionary)
if !isDict {
common.Log.Debug("error: DecodeParms not a dictionary (%T)", inlineImage.DecodeParms)
return nil, fmt.Errorf("invalid DecodeParms")
}
decodeParams = dp
}
}
if decodeParams == nil {
// No decode parameters. Can safely return here if not set as the following options
// are related to the decode Params.
return encoder, nil
}
// The EarlyChange indicates when to increase code length, as different
// implementations use a different mechanisms. Essentially this chooses
// which LZW implementation to use.
// The default is 1 (one code early)
//
// The EarlyChange parameter is specified in the object stream dictionary for regular streams,
// but it is not specified explicitly where to check for it in the case of inline images.
// We will check in the decodeParms for now, we can adjust later if we come across cases of this.
obj := decodeParams.Get("EarlyChange")
if obj != nil {
earlyChange, ok := obj.(*core.PdfObjectInteger)
if !ok {
common.Log.Debug("error: EarlyChange specified but not numeric (%T)", obj)
return nil, fmt.Errorf("invalid EarlyChange")
}
if *earlyChange != 0 && *earlyChange != 1 {
return nil, fmt.Errorf("invalid EarlyChange value (not 0 or 1)")
}
encoder.EarlyChange = int(*earlyChange)
} else {
encoder.EarlyChange = 1 // default
}
obj = decodeParams.Get("Predictor")
if obj != nil {
predictor, ok := obj.(*core.PdfObjectInteger)
if !ok {
common.Log.Debug("error: Predictor specified but not numeric (%T)", obj)
return nil, fmt.Errorf("invalid Predictor")
}
encoder.Predictor = int(*predictor)
}
// Bits per component. Use default if not specified (8).
obj = decodeParams.Get("BitsPerComponent")
if obj != nil {
bpc, ok := obj.(*core.PdfObjectInteger)
if !ok {
common.Log.Debug("error: Invalid BitsPerComponent")
return nil, fmt.Errorf("invalid BitsPerComponent")
}
encoder.BitsPerComponent = int(*bpc)
}
if encoder.Predictor > 1 {
// Columns.
encoder.Columns = 1
obj = decodeParams.Get("Columns")
if obj != nil {
columns, ok := obj.(*core.PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("predictor column invalid")
}
encoder.Columns = int(*columns)
}
// Colors.
// Number of interleaved color components per sample (Default 1 if not specified)
encoder.Colors = 1
obj = decodeParams.Get("Colors")
if obj != nil {
colors, ok := obj.(*core.PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("predictor colors not an integer")
}
encoder.Colors = int(*colors)
}
}
common.Log.Trace("decode params: %s", decodeParams.String())
return encoder, nil
}
// Create a new DCT encoder/decoder based on an inline image, getting all the encoding parameters
// from the stream object dictionary entry and the image data itself.
func newDCTEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (*core.DCTEncoder, error) {
// Start with default settings.
encoder := core.NewDCTEncoder()
bufReader := bytes.NewReader(inlineImage.stream)
cfg, err := jpeg.DecodeConfig(bufReader)
//img, _, err := goimage.Decode(bufReader)
if err != nil {
common.Log.Debug("error decoding file: %s", err)
return nil, err
}
switch cfg.ColorModel {
case gocolor.RGBAModel:
encoder.BitsPerComponent = 8
encoder.ColorComponents = 3 // alpha is not included in pdf.
case gocolor.RGBA64Model:
encoder.BitsPerComponent = 16
encoder.ColorComponents = 3
case gocolor.GrayModel:
encoder.BitsPerComponent = 8
encoder.ColorComponents = 1
case gocolor.Gray16Model:
encoder.BitsPerComponent = 16
encoder.ColorComponents = 1
case gocolor.CMYKModel:
encoder.BitsPerComponent = 8
encoder.ColorComponents = 4
case gocolor.YCbCrModel:
// YCbCr is not supported by PDF, but it could be a different colorspace
// with 3 components. Would be specified by the ColorSpace entry.
encoder.BitsPerComponent = 8
encoder.ColorComponents = 3
default:
return nil, errors.New("unsupported color model")
}
encoder.Width = cfg.Width
encoder.Height = cfg.Height
common.Log.Trace("DCT Encoder: %+v", encoder)
return encoder, nil
}
// Create a new multi-filter encoder/decoder based on an inline image, getting all the encoding parameters
// from the filter specification and the DecodeParms (DP) dictionaries.
func newMultiEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (*core.MultiEncoder, error) {
mencoder := core.NewMultiEncoder()
// Prepare the decode params array (one for each filter type)
// Optional, not always present.
var decodeParamsDict *core.PdfObjectDictionary
decodeParamsArray := []core.PdfObject{}
if obj := inlineImage.DecodeParms; obj != nil {
// If it is a dictionary, assume it applies to all
dict, isDict := obj.(*core.PdfObjectDictionary)
if isDict {
decodeParamsDict = dict
}
// If it is an array, assume there is one for each
arr, isArray := obj.(*core.PdfObjectArray)
if isArray {
for _, dictObj := range *arr {
if dict, is := dictObj.(*core.PdfObjectDictionary); is {
decodeParamsArray = append(decodeParamsArray, dict)
} else {
decodeParamsArray = append(decodeParamsArray, nil)
}
}
}
}
obj := inlineImage.Filter
if obj == nil {
return nil, fmt.Errorf("filter missing")
}
array, ok := obj.(*core.PdfObjectArray)
if !ok {
return nil, fmt.Errorf("multi filter can only be made from array")
}
for idx, obj := range *array {
name, ok := obj.(*core.PdfObjectName)
if !ok {
return nil, fmt.Errorf("multi filter array element not a name")
}
var dp core.PdfObject
// If decode params dict is set, use it. Otherwise take from array..
if decodeParamsDict != nil {
dp = decodeParamsDict
} else {
// Only get the dp if provided. Oftentimes there is no decode params dict
// provided.
if len(decodeParamsArray) > 0 {
if idx >= len(decodeParamsArray) {
return nil, fmt.Errorf("missing elements in decode params array")
}
dp = decodeParamsArray[idx]
}
}
var dParams *core.PdfObjectDictionary
if dict, is := dp.(*core.PdfObjectDictionary); is {
dParams = dict
}
switch *name {
case core.StreamEncodingFilterNameFlate, "Fl":
// XXX: need to separate out the DecodeParms..
encoder, err := newFlateEncoderFromInlineImage(inlineImage, dParams)
if err != nil {
return nil, err
}
mencoder.AddEncoder(encoder)
case core.StreamEncodingFilterNameLZW:
encoder, err := newLZWEncoderFromInlineImage(inlineImage, dParams)
if err != nil {
return nil, err
}
mencoder.AddEncoder(encoder)
case core.StreamEncodingFilterNameASCIIHex:
encoder := core.NewASCIIHexEncoder()
mencoder.AddEncoder(encoder)
case core.StreamEncodingFilterNameASCII85, "A85":
encoder := core.NewASCII85Encoder()
mencoder.AddEncoder(encoder)
default:
common.Log.Error("Unsupported filter %s", *name)
return nil, fmt.Errorf("invalid filter in multi filter array")
}
}
return mencoder, nil
}

View File

@@ -0,0 +1,456 @@
package contentstream
import (
"bytes"
"errors"
"fmt"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model"
)
// A representation of an inline image in a Content stream. Everything between the BI and EI operands.
// ContentStreamInlineImage implements the core.PdfObject interface although strictly it is not a PDF object.
type ContentStreamInlineImage struct {
BitsPerComponent core.PdfObject
ColorSpace core.PdfObject
Decode core.PdfObject
DecodeParms core.PdfObject
Filter core.PdfObject
Height core.PdfObject
ImageMask core.PdfObject
Intent core.PdfObject
Interpolate core.PdfObject
Width core.PdfObject
stream []byte
}
// Make a new content stream inline image object from an image.
func NewInlineImageFromImage(img model.Image, encoder core.StreamEncoder) (*ContentStreamInlineImage, error) {
if encoder == nil {
encoder = core.NewRawEncoder()
}
inlineImage := ContentStreamInlineImage{}
switch img.ColorComponents {
case 1:
inlineImage.ColorSpace = core.MakeName("G") // G short for DeviceGray
case 3:
inlineImage.ColorSpace = core.MakeName("RGB") // RGB short for DeviceRGB
case 4:
inlineImage.ColorSpace = core.MakeName("CMYK") // CMYK short for DeviceCMYK
default:
common.Log.Debug("invalid number of color components for inline image: %d", img.ColorComponents)
return nil, errors.New("invalid number of color components")
}
inlineImage.BitsPerComponent = core.MakeInteger(img.BitsPerComponent)
inlineImage.Width = core.MakeInteger(img.Width)
inlineImage.Height = core.MakeInteger(img.Height)
encoded, err := encoder.EncodeBytes(img.Data)
if err != nil {
return nil, err
}
inlineImage.stream = encoded
filterName := encoder.GetFilterName()
if filterName != core.StreamEncodingFilterNameRaw {
inlineImage.Filter = core.MakeName(filterName)
}
// XXX/FIXME: Add decode params?
return &inlineImage, nil
}
func (si *ContentStreamInlineImage) String() string {
s := fmt.Sprintf("InlineImage(len=%d)\n", len(si.stream))
if si.BitsPerComponent != nil {
s += "- BPC " + si.BitsPerComponent.DefaultWriteString() + "\n"
}
if si.ColorSpace != nil {
s += "- CS " + si.ColorSpace.DefaultWriteString() + "\n"
}
if si.Decode != nil {
s += "- D " + si.Decode.DefaultWriteString() + "\n"
}
if si.DecodeParms != nil {
s += "- DP " + si.DecodeParms.DefaultWriteString() + "\n"
}
if si.Filter != nil {
s += "- F " + si.Filter.DefaultWriteString() + "\n"
}
if si.Height != nil {
s += "- H " + si.Height.DefaultWriteString() + "\n"
}
if si.ImageMask != nil {
s += "- IM " + si.ImageMask.DefaultWriteString() + "\n"
}
if si.Intent != nil {
s += "- Intent " + si.Intent.DefaultWriteString() + "\n"
}
if si.Interpolate != nil {
s += "- I " + si.Interpolate.DefaultWriteString() + "\n"
}
if si.Width != nil {
s += "- W " + si.Width.DefaultWriteString() + "\n"
}
return s
}
func (si *ContentStreamInlineImage) DefaultWriteString() string {
var output bytes.Buffer
// We do not start with "BI" as that is the operand and is written out separately.
// Write out the parameters
s := ""
if si.BitsPerComponent != nil {
s += "/BPC " + si.BitsPerComponent.DefaultWriteString() + "\n"
}
if si.ColorSpace != nil {
s += "/CS " + si.ColorSpace.DefaultWriteString() + "\n"
}
if si.Decode != nil {
s += "/D " + si.Decode.DefaultWriteString() + "\n"
}
if si.DecodeParms != nil {
s += "/DP " + si.DecodeParms.DefaultWriteString() + "\n"
}
if si.Filter != nil {
s += "/F " + si.Filter.DefaultWriteString() + "\n"
}
if si.Height != nil {
s += "/H " + si.Height.DefaultWriteString() + "\n"
}
if si.ImageMask != nil {
s += "/IM " + si.ImageMask.DefaultWriteString() + "\n"
}
if si.Intent != nil {
s += "/Intent " + si.Intent.DefaultWriteString() + "\n"
}
if si.Interpolate != nil {
s += "/I " + si.Interpolate.DefaultWriteString() + "\n"
}
if si.Width != nil {
s += "/W " + si.Width.DefaultWriteString() + "\n"
}
output.WriteString(s)
output.WriteString("ID ")
output.Write(si.stream)
output.WriteString("\nEI\n")
return output.String()
}
func (s *ContentStreamInlineImage) GetColorSpace(resources *model.PdfPageResources) (model.PdfColorspace, error) {
if s.ColorSpace == nil {
// Default.
common.Log.Debug("Inline image not having specified colorspace, assuming Gray")
return model.NewPdfColorspaceDeviceGray(), nil
}
// If is an array, then could be an indexed colorspace.
if arr, isArr := s.ColorSpace.(*core.PdfObjectArray); isArr {
return newIndexedColorspaceFromPdfObject(arr)
}
name, ok := s.ColorSpace.(*core.PdfObjectName)
if !ok {
common.Log.Debug("error: Invalid object type (%T;%+v)", s.ColorSpace, s.ColorSpace)
return nil, errors.New("type check error")
}
switch *name {
case "G", "DeviceGray":
return model.NewPdfColorspaceDeviceGray(), nil
case "RGB", "DeviceRGB":
return model.NewPdfColorspaceDeviceRGB(), nil
case "CMYK", "DeviceCMYK":
return model.NewPdfColorspaceDeviceCMYK(), nil
case "I", "Indexed":
return nil, errors.New("unsupported Index colorspace")
default:
if resources.ColorSpace == nil {
// Can also refer to a name in the PDF page resources...
common.Log.Debug("error, unsupported inline image colorspace: %s", *name)
return nil, errors.New("unknown colorspace")
}
cs, has := resources.ColorSpace.Colorspaces[string(*name)]
if !has {
// Can also refer to a name in the PDF page resources...
common.Log.Debug("error, unsupported inline image colorspace: %s", *name)
return nil, errors.New("unknown colorspace")
}
return cs, nil
}
}
func (s *ContentStreamInlineImage) GetEncoder() (core.StreamEncoder, error) {
return newEncoderFromInlineImage(s)
}
// Is a mask ?
// The image mask entry in the image dictionary specifies that the image data shall be used as a stencil
// mask for painting in the current color. The mask data is 1bpc, grayscale.
func (s *ContentStreamInlineImage) IsMask() (bool, error) {
if s.ImageMask != nil {
imMask, ok := s.ImageMask.(*core.PdfObjectBool)
if !ok {
common.Log.Debug("Image mask not a boolean")
return false, errors.New("invalid object type")
}
return bool(*imMask), nil
} else {
return false, nil
}
}
// Export the inline image to Image which can be transformed or exported easily.
// Page resources are needed to look up colorspace information.
func (si *ContentStreamInlineImage) ToImage(resources *model.PdfPageResources) (*model.Image, error) {
// Decode the imaging data if encoded.
encoder, err := newEncoderFromInlineImage(si)
if err != nil {
return nil, err
}
common.Log.Trace("encoder: %+v %T", encoder, encoder)
common.Log.Trace("inline image: %+v", si)
decoded, err := encoder.DecodeBytes(si.stream)
if err != nil {
return nil, err
}
image := &model.Image{}
// Height.
if si.Height == nil {
return nil, errors.New("height attribute missing")
}
height, ok := si.Height.(*core.PdfObjectInteger)
if !ok {
return nil, errors.New("invalid height")
}
image.Height = int64(*height)
// Width.
if si.Width == nil {
return nil, errors.New("width attribute missing")
}
width, ok := si.Width.(*core.PdfObjectInteger)
if !ok {
return nil, errors.New("invalid width")
}
image.Width = int64(*width)
// Image mask?
isMask, err := si.IsMask()
if err != nil {
return nil, err
}
if isMask {
// Masks are grayscale 1bpc.
image.BitsPerComponent = 1
image.ColorComponents = 1
} else {
// BPC.
if si.BitsPerComponent == nil {
common.Log.Debug("Inline Bits per component missing - assuming 8")
image.BitsPerComponent = 8
} else {
bpc, ok := si.BitsPerComponent.(*core.PdfObjectInteger)
if !ok {
common.Log.Debug("error invalid bits per component value, type %T", si.BitsPerComponent)
return nil, errors.New("BPC Type error")
}
image.BitsPerComponent = int64(*bpc)
}
// Color components.
if si.ColorSpace != nil {
cs, err := si.GetColorSpace(resources)
if err != nil {
return nil, err
}
image.ColorComponents = cs.GetNumComponents()
} else {
// Default gray if not specified.
common.Log.Debug("Inline Image colorspace not specified - assuming 1 color component")
image.ColorComponents = 1
}
}
image.Data = decoded
return image, nil
}
// Parse an inline image from a content stream, both read its properties and binary data.
// When called, "BI" has already been read from the stream. This function
// finishes reading through "EI" and then returns the ContentStreamInlineImage.
func (s *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, error) {
// Reading parameters.
im := ContentStreamInlineImage{}
for {
s.skipSpaces()
obj, err, isOperand := s.parseObject()
if err != nil {
return nil, err
}
if !isOperand {
// Not an operand.. Read key value properties..
param, ok := obj.(*core.PdfObjectName)
if !ok {
common.Log.Debug("invalid inline image property (expecting name) - %T", obj)
return nil, fmt.Errorf("invalid inline image property (expecting name) - %T", obj)
}
valueObj, err, isOperand := s.parseObject()
if err != nil {
return nil, err
}
if isOperand {
return nil, fmt.Errorf("not expecting an operand")
}
// From 8.9.7 "Inline Images" p. 223 (PDF32000_2008):
// The key-value pairs appearing between the BI and ID operators are analogous to those in the dictionary
// portion of an image XObject (though the syntax is different).
// Table 93 shows the entries that are valid for an inline image, all of which shall have the same meanings
// as in a stream dictionary (see Table 5) or an image dictionary (see Table 89).
// Entries other than those listed shall be ignored; in particular, the Type, Subtype, and Length
// entries normally found in a stream or image dictionary are unnecessary.
// For convenience, the abbreviations shown in the table may be used in place of the fully spelled-out keys.
// Table 94 shows additional abbreviations that can be used for the names of colour spaces and filters.
switch *param {
case "BPC", "BitsPerComponent":
im.BitsPerComponent = valueObj
case "CS", "ColorSpace":
im.ColorSpace = valueObj
case "D", "Decode":
im.Decode = valueObj
case "DP", "DecodeParms":
im.DecodeParms = valueObj
case "F", "Filter":
im.Filter = valueObj
case "H", "Height":
im.Height = valueObj
case "IM", "ImageMask":
im.ImageMask = valueObj
case "Intent":
im.Intent = valueObj
case "I", "Interpolate":
im.Interpolate = valueObj
case "W", "Width":
im.Width = valueObj
default:
return nil, fmt.Errorf("unknown inline image parameter %s", *param)
}
}
if isOperand {
operand, ok := obj.(*core.PdfObjectString)
if !ok {
return nil, fmt.Errorf("failed to read inline image - invalid operand")
}
switch *operand {
case "EI":
// Image fully defined
common.Log.Trace("Inline image finished...")
return &im, nil
case "ID":
// Inline image data.
// Should get a single space (0x20) followed by the data and then EI.
common.Log.Trace("ID start")
// Skip the space if its there.
b, err := s.reader.Peek(1)
if err != nil {
return nil, err
}
if core.IsWhiteSpace(b[0]) {
s.reader.Discard(1)
}
// Unfortunately there is no good way to know how many bytes to read since it
// depends on the Filter and encoding etc.
// Therefore we will simply read until we find "<ws>EI<ws>" where <ws> is whitespace
// although of course that could be a part of the data (even if unlikely).
im.stream = []byte{}
state := 0
var skipBytes []byte
for {
c, err := s.reader.ReadByte()
if err != nil {
common.Log.Debug("Unable to find end of image EI in inline image data")
return nil, err
}
switch state {
case 0:
if core.IsWhiteSpace(c) {
skipBytes = []byte{}
skipBytes = append(skipBytes, c)
state = 1
} else {
im.stream = append(im.stream, c)
}
case 1:
skipBytes = append(skipBytes, c)
if c == 'E' {
state = 2
} else {
im.stream = append(im.stream, skipBytes...)
skipBytes = []byte{} // Clear.
// Need an extra check to decide if we fall back to state 0 or 1.
if core.IsWhiteSpace(c) {
state = 1
} else {
state = 0
}
}
case 2:
skipBytes = append(skipBytes, c)
if c == 'I' {
state = 3
} else {
im.stream = append(im.stream, skipBytes...)
skipBytes = []byte{} // Clear.
state = 0
}
case 3:
skipBytes = append(skipBytes, c)
if core.IsWhiteSpace(c) {
// image data finished.
if len(im.stream) > 100 {
common.Log.Trace("Image stream (%d): % x ...", len(im.stream), im.stream[:100])
} else {
common.Log.Trace("Image stream (%d): % x", len(im.stream), im.stream)
}
// Exit point.
return &im, nil
} else {
// Seems like "<ws>EI" was part of the data.
im.stream = append(im.stream, skipBytes...)
skipBytes = []byte{} // Clear.
state = 0
}
}
}
// Never reached (exit point is at end of EI).
}
}
}
}

View File

@@ -0,0 +1,586 @@
package contentstream
import (
"bufio"
"bytes"
"encoding/hex"
"errors"
"fmt"
"io"
"strconv"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
)
// Content stream parser.
type ContentStreamParser struct {
reader *bufio.Reader
}
// Create a new instance of the content stream parser from an input content
// stream string.
func NewContentStreamParser(contentStr string) *ContentStreamParser {
// Each command has parameters and an operand (command).
parser := ContentStreamParser{}
buffer := bytes.NewBufferString(contentStr + "\n") // Add newline at end to get last operand without EOF error.
parser.reader = bufio.NewReader(buffer)
return &parser
}
// Parses all commands in content stream, returning a list of operation data.
func (sp *ContentStreamParser) Parse() (*ContentStreamOperations, error) {
operations := ContentStreamOperations{}
for {
operation := ContentStreamOperation{}
for {
obj, err, isOperand := sp.parseObject()
if err != nil {
if err == io.EOF {
// End of data. Successful exit point.
return &operations, nil
}
return &operations, err
}
if isOperand {
operation.Operand = string(*obj.(*core.PdfObjectString))
operations = append(operations, &operation)
break
} else {
operation.Params = append(operation.Params, obj)
}
}
if operation.Operand == "BI" {
// Parse an inline image, reads everything between the "BI" and "EI".
// The image is stored as the parameter.
im, err := sp.ParseInlineImage()
if err != nil {
return &operations, err
}
operation.Params = append(operation.Params, im)
}
}
}
// Skip over any spaces. Returns the number of spaces skipped and
// an error if any.
func (sp *ContentStreamParser) skipSpaces() (int, error) {
cnt := 0
for {
bb, err := sp.reader.Peek(1)
if err != nil {
return 0, err
}
if core.IsWhiteSpace(bb[0]) {
sp.reader.ReadByte()
cnt++
} else {
break
}
}
return cnt, nil
}
// Skip over comments and spaces. Can handle multi-line comments.
func (sp *ContentStreamParser) skipComments() error {
if _, err := sp.skipSpaces(); err != nil {
return err
}
isFirst := true
for {
bb, err := sp.reader.Peek(1)
if err != nil {
common.Log.Debug("error %s", err.Error())
return err
}
if isFirst && bb[0] != '%' {
// Not a comment clearly.
return nil
} else {
isFirst = false
}
if (bb[0] != '\r') && (bb[0] != '\n') {
sp.reader.ReadByte()
} else {
break
}
}
// Call recursively to handle multiline comments.
return sp.skipComments()
}
// Parse a name starting with '/'.
func (sp *ContentStreamParser) parseName() (core.PdfObjectName, error) {
name := ""
nameStarted := false
for {
bb, err := sp.reader.Peek(1)
if err == io.EOF {
break // Can happen when loading from object stream.
}
if err != nil {
return core.PdfObjectName(name), err
}
if !nameStarted {
// Should always start with '/', otherwise not valid.
if bb[0] == '/' {
nameStarted = true
sp.reader.ReadByte()
} else {
common.Log.Error("Name starting with %s (% x)", bb, bb)
return core.PdfObjectName(name), fmt.Errorf("invalid name: (%c)", bb[0])
}
} else {
if core.IsWhiteSpace(bb[0]) {
break
} else if (bb[0] == '/') || (bb[0] == '[') || (bb[0] == '(') || (bb[0] == ']') || (bb[0] == '<') || (bb[0] == '>') {
break // Looks like start of next statement.
} else if bb[0] == '#' {
hexcode, err := sp.reader.Peek(3)
if err != nil {
return core.PdfObjectName(name), err
}
sp.reader.Discard(3)
code, err := hex.DecodeString(string(hexcode[1:3]))
if err != nil {
return core.PdfObjectName(name), err
}
name += string(code)
} else {
b, _ := sp.reader.ReadByte()
name += string(b)
}
}
}
return core.PdfObjectName(name), nil
}
// Numeric objects.
// Section 7.3.3.
// Integer or Float.
//
// An integer shall be written as one or more decimal digits optionally
// preceded by a sign. The value shall be interpreted as a signed
// decimal integer and shall be converted to an integer object.
//
// A real value shall be written as one or more decimal digits with an
// optional sign and a leading, trailing, or embedded PERIOD (2Eh)
// (decimal point). The value shall be interpreted as a real number
// and shall be converted to a real object.
//
// Regarding exponential numbers: 7.3.3 Numeric Objects:
// A conforming writer shall not use the PostScript syntax for numbers
// with non-decimal radices (such as 16#FFFE) or in exponential format
// (such as 6.02E23).
// Nonetheless, we sometimes get numbers with exponential format, so
// we will support it in the reader (no confusion with other types, so
// no compromise).
func (sp *ContentStreamParser) parseNumber() (core.PdfObject, error) {
isFloat := false
allowSigns := true
numStr := ""
for {
common.Log.Trace("Parsing number \"%s\"", numStr)
bb, err := sp.reader.Peek(1)
if err == io.EOF {
// GH: EOF handling. Handle EOF like end of line. Can happen with
// encoded object streams that the object is at the end.
// In other cases, we will get the EOF error elsewhere at any rate.
break // Handle like EOF
}
if err != nil {
common.Log.Error("error %s", err)
return nil, err
}
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
// Only appear in the beginning, otherwise serves as a delimiter.
b, _ := sp.reader.ReadByte()
numStr += string(b)
allowSigns = false // Only allowed in beginning, and after e (exponential).
} else if core.IsDecimalDigit(bb[0]) {
b, _ := sp.reader.ReadByte()
numStr += string(b)
} else if bb[0] == '.' {
b, _ := sp.reader.ReadByte()
numStr += string(b)
isFloat = true
} else if bb[0] == 'e' {
// Exponential number format.
b, _ := sp.reader.ReadByte()
numStr += string(b)
isFloat = true
allowSigns = true
} else {
break
}
}
if isFloat {
fVal, err := strconv.ParseFloat(numStr, 64)
if err != nil {
common.Log.Debug("error parsing number %q err=%v. Using 0.0. Output may be incorrect", numStr, err)
fVal = 0.0
err = nil
}
o := core.PdfObjectFloat(fVal)
return &o, err
} else {
intVal, err := strconv.ParseInt(numStr, 10, 64)
if err != nil {
common.Log.Debug("error parsing integer %q err=%v. Using 0. Output may be incorrect", numStr, err)
intVal = 0
err = nil
}
o := core.PdfObjectInteger(intVal)
return &o, err
}
}
// A string starts with '(' and ends with ')'.
func (sp *ContentStreamParser) parseString() (core.PdfObjectString, error) {
sp.reader.ReadByte()
bytes := []byte{}
count := 1
for {
bb, err := sp.reader.Peek(1)
if err != nil {
return core.PdfObjectString(bytes), err
}
if bb[0] == '\\' { // Escape sequence.
sp.reader.ReadByte() // Skip the escape \ byte.
b, err := sp.reader.ReadByte()
if err != nil {
return core.PdfObjectString(bytes), err
}
// Octal '\ddd' number (base 8).
if core.IsOctalDigit(b) {
bb, err := sp.reader.Peek(2)
if err != nil {
return core.PdfObjectString(bytes), err
}
numeric := []byte{}
numeric = append(numeric, b)
for _, val := range bb {
if core.IsOctalDigit(val) {
numeric = append(numeric, val)
} else {
break
}
}
sp.reader.Discard(len(numeric) - 1)
common.Log.Trace("Numeric string \"%s\"", numeric)
code, err := strconv.ParseUint(string(numeric), 8, 32)
if err != nil {
return core.PdfObjectString(bytes), err
}
bytes = append(bytes, byte(code))
continue
}
switch b {
case 'n':
bytes = append(bytes, '\n')
case 'r':
bytes = append(bytes, '\r')
case 't':
bytes = append(bytes, '\t')
case 'b':
bytes = append(bytes, '\b')
case 'f':
bytes = append(bytes, '\f')
case '(':
bytes = append(bytes, '(')
case ')':
bytes = append(bytes, ')')
case '\\':
bytes = append(bytes, '\\')
}
continue
} else if bb[0] == '(' {
count++
} else if bb[0] == ')' {
count--
if count == 0 {
sp.reader.ReadByte()
break
}
}
b, _ := sp.reader.ReadByte()
bytes = append(bytes, b)
}
return core.PdfObjectString(bytes), nil
}
// Starts with '<' ends with '>'.
func (sp *ContentStreamParser) parseHexString() (core.PdfObjectString, error) {
sp.reader.ReadByte()
hextable := []byte("0123456789abcdefABCDEF")
tmp := []byte{}
for {
sp.skipSpaces()
bb, err := sp.reader.Peek(1)
if err != nil {
return core.PdfObjectString(""), err
}
if bb[0] == '>' {
sp.reader.ReadByte()
break
}
b, _ := sp.reader.ReadByte()
if bytes.IndexByte(hextable, b) >= 0 {
tmp = append(tmp, b)
}
}
if len(tmp)%2 == 1 {
tmp = append(tmp, '0')
}
buf, _ := hex.DecodeString(string(tmp))
return core.PdfObjectString(buf), nil
}
// Starts with '[' ends with ']'. Can contain any kinds of direct objects.
func (sp *ContentStreamParser) parseArray() (core.PdfObjectArray, error) {
arr := make(core.PdfObjectArray, 0)
sp.reader.ReadByte()
for {
sp.skipSpaces()
bb, err := sp.reader.Peek(1)
if err != nil {
return arr, err
}
if bb[0] == ']' {
sp.reader.ReadByte()
break
}
obj, err, _ := sp.parseObject()
if err != nil {
return arr, err
}
arr = append(arr, obj)
}
return arr, nil
}
// Parse bool object.
func (sp *ContentStreamParser) parseBool() (core.PdfObjectBool, error) {
bb, err := sp.reader.Peek(4)
if err != nil {
return core.PdfObjectBool(false), err
}
if (len(bb) >= 4) && (string(bb[:4]) == "true") {
sp.reader.Discard(4)
return core.PdfObjectBool(true), nil
}
bb, err = sp.reader.Peek(5)
if err != nil {
return core.PdfObjectBool(false), err
}
if (len(bb) >= 5) && (string(bb[:5]) == "false") {
sp.reader.Discard(5)
return core.PdfObjectBool(false), nil
}
return core.PdfObjectBool(false), errors.New("unexpected boolean string")
}
// Parse null object.
func (sp *ContentStreamParser) parseNull() (core.PdfObjectNull, error) {
_, err := sp.reader.Discard(4)
return core.PdfObjectNull{}, err
}
func (sp *ContentStreamParser) parseDict() (*core.PdfObjectDictionary, error) {
common.Log.Trace("Reading content stream dict!")
dict := core.MakeDict()
// Pass the '<<'
c, _ := sp.reader.ReadByte()
if c != '<' {
return nil, errors.New("invalid dict")
}
c, _ = sp.reader.ReadByte()
if c != '<' {
return nil, errors.New("invalid dict")
}
for {
sp.skipSpaces()
bb, err := sp.reader.Peek(2)
if err != nil {
return nil, err
}
common.Log.Trace("Dict peek: %s (% x)!", string(bb), string(bb))
if (bb[0] == '>') && (bb[1] == '>') {
common.Log.Trace("EOF dictionary")
sp.reader.ReadByte()
sp.reader.ReadByte()
break
}
common.Log.Trace("Parse the name!")
keyName, err := sp.parseName()
common.Log.Trace("Key: %s", keyName)
if err != nil {
common.Log.Debug("error Returning name err %s", err)
return nil, err
}
if len(keyName) > 4 && keyName[len(keyName)-4:] == "null" {
// Some writers have a bug where the null is appended without
// space. For example "\Boundsnull"
newKey := keyName[0 : len(keyName)-4]
common.Log.Trace("Taking care of null bug (%s)", keyName)
common.Log.Trace("New key \"%s\" = null", newKey)
sp.skipSpaces()
bb, _ := sp.reader.Peek(1)
if bb[0] == '/' {
dict.Set(newKey, core.MakeNull())
continue
}
}
sp.skipSpaces()
val, err, _ := sp.parseObject()
if err != nil {
return nil, err
}
dict.Set(keyName, val)
common.Log.Trace("dict[%s] = %s", keyName, val.String())
}
return dict, nil
}
// An operand is a text command represented by a word.
func (sp *ContentStreamParser) parseOperand() (core.PdfObjectString, error) {
bytes := []byte{}
for {
bb, err := sp.reader.Peek(1)
if err != nil {
return core.PdfObjectString(bytes), err
}
if core.IsDelimiter(bb[0]) {
break
}
if core.IsWhiteSpace(bb[0]) {
break
}
b, _ := sp.reader.ReadByte()
bytes = append(bytes, b)
}
return core.PdfObjectString(bytes), nil
}
// Parse a generic object. Returns the object, an error code, and a bool
// value indicating whether the object is an operand. An operand
// is contained in a pdf string object.
func (sp *ContentStreamParser) parseObject() (core.PdfObject, error, bool) {
// Determine the kind of object.
// parse it!
// make a list of operands, then once operand arrives put into a package.
sp.skipSpaces()
for {
bb, err := sp.reader.Peek(2)
if err != nil {
return nil, err, false
}
common.Log.Trace("Peek string: %s", string(bb))
// Determine type.
if bb[0] == '%' {
sp.skipComments()
continue
} else if bb[0] == '/' {
name, err := sp.parseName()
common.Log.Trace("->Name: '%s'", name)
return &name, err, false
} else if bb[0] == '(' {
common.Log.Trace("->String!")
str, err := sp.parseString()
common.Log.Trace("(%s)\n", str.String())
return &str, err, false
} else if bb[0] == '<' && bb[1] != '<' {
common.Log.Trace("->Hex String!")
str, err := sp.parseHexString()
return &str, err, false
} else if bb[0] == '[' {
common.Log.Trace("->Array!")
arr, err := sp.parseArray()
return &arr, err, false
} else if core.IsFloatDigit(bb[0]) || (bb[0] == '-' && core.IsFloatDigit(bb[1])) {
common.Log.Trace("->Number!")
number, err := sp.parseNumber()
return number, err, false
} else if bb[0] == '<' && bb[1] == '<' {
dict, err := sp.parseDict()
return dict, err, false
} else {
// Otherwise, can be: keyword such as "null", "false", "true" or an operand...
common.Log.Trace("->Operand or bool?")
// Let's peek farther to find out.
bb, _ = sp.reader.Peek(5)
peekStr := string(bb)
common.Log.Trace("cont Peek str: %s", peekStr)
if (len(peekStr) > 3) && (peekStr[:4] == "null") {
null, err := sp.parseNull()
return &null, err, false
} else if (len(peekStr) > 4) && (peekStr[:5] == "false") {
b, err := sp.parseBool()
return &b, err, false
} else if (len(peekStr) > 3) && (peekStr[:4] == "true") {
b, err := sp.parseBool()
return &b, err, false
}
operand, err := sp.parseOperand()
if err != nil {
return &operand, err, false
}
if len(operand.String()) < 1 {
return &operand, ErrInvalidOperand, false
}
return &operand, nil, true
}
}
}

View File

@@ -0,0 +1,541 @@
package contentstream
import (
"errors"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model"
)
// Basic graphics state implementation.
// Initially only implementing and tracking a portion of the information specified. Easy to add more.
type GraphicsState struct {
ColorspaceStroking model.PdfColorspace
ColorspaceNonStroking model.PdfColorspace
ColorStroking model.PdfColor
ColorNonStroking model.PdfColor
}
type GraphicStateStack []GraphicsState
func (gsStack *GraphicStateStack) Push(gs GraphicsState) {
*gsStack = append(*gsStack, gs)
}
func (gsStack *GraphicStateStack) Pop() GraphicsState {
gs := (*gsStack)[len(*gsStack)-1]
*gsStack = (*gsStack)[:len(*gsStack)-1]
return gs
}
// ContentStreamProcessor defines a data structure and methods for processing a content stream, keeping track of the
// current graphics state, and allowing external handlers to define their own functions as a part of the processing,
// for example rendering or extracting certain information.
type ContentStreamProcessor struct {
graphicsStack GraphicStateStack
operations []*ContentStreamOperation
graphicsState GraphicsState
handlers []HandlerEntry
currentIndex int
}
type HandlerFunc func(op *ContentStreamOperation, gs GraphicsState, resources *model.PdfPageResources) error
type HandlerEntry struct {
Condition HandlerConditionEnum
Operand string
Handler HandlerFunc
}
type HandlerConditionEnum int
func (ce HandlerConditionEnum) All() bool {
return ce == HandlerConditionEnumAllOperands
}
func (ce HandlerConditionEnum) Operand() bool {
return ce == HandlerConditionEnumOperand
}
const (
HandlerConditionEnumOperand HandlerConditionEnum = iota
HandlerConditionEnumAllOperands HandlerConditionEnum = iota
)
func NewContentStreamProcessor(ops []*ContentStreamOperation) *ContentStreamProcessor {
csp := ContentStreamProcessor{}
csp.graphicsStack = GraphicStateStack{}
// Set defaults..
gs := GraphicsState{}
csp.graphicsState = gs
csp.handlers = []HandlerEntry{}
csp.currentIndex = 0
csp.operations = ops
return &csp
}
func (csp *ContentStreamProcessor) AddHandler(condition HandlerConditionEnum, operand string, handler HandlerFunc) {
entry := HandlerEntry{}
entry.Condition = condition
entry.Operand = operand
entry.Handler = handler
csp.handlers = append(csp.handlers, entry)
}
func (csp *ContentStreamProcessor) getColorspace(name string, resources *model.PdfPageResources) (model.PdfColorspace, error) {
switch name {
case "DeviceGray":
return model.NewPdfColorspaceDeviceGray(), nil
case "DeviceRGB":
return model.NewPdfColorspaceDeviceRGB(), nil
case "DeviceCMYK":
return model.NewPdfColorspaceDeviceCMYK(), nil
case "Pattern":
return model.NewPdfColorspaceSpecialPattern(), nil
}
// Next check the colorspace dictionary.
cs, has := resources.ColorSpace.Colorspaces[name]
if has {
return cs, nil
}
// Lastly check other potential colormaps.
switch name {
case "CalGray":
return model.NewPdfColorspaceCalGray(), nil
case "CalRGB":
return model.NewPdfColorspaceCalRGB(), nil
case "Lab":
return model.NewPdfColorspaceLab(), nil
}
// Otherwise unsupported.
common.Log.Debug("Unknown colorspace requested: %s", name)
return nil, errors.New("unsupported colorspace")
}
// Get initial color for a given colorspace.
func (csp *ContentStreamProcessor) getInitialColor(cs model.PdfColorspace) (model.PdfColor, error) {
switch cs := cs.(type) {
case *model.PdfColorspaceDeviceGray:
return model.NewPdfColorDeviceGray(0.0), nil
case *model.PdfColorspaceDeviceRGB:
return model.NewPdfColorDeviceRGB(0.0, 0.0, 0.0), nil
case *model.PdfColorspaceDeviceCMYK:
return model.NewPdfColorDeviceCMYK(0.0, 0.0, 0.0, 1.0), nil
case *model.PdfColorspaceCalGray:
return model.NewPdfColorCalGray(0.0), nil
case *model.PdfColorspaceCalRGB:
return model.NewPdfColorCalRGB(0.0, 0.0, 0.0), nil
case *model.PdfColorspaceLab:
l := 0.0
a := 0.0
b := 0.0
if cs.Range[0] > 0 {
l = cs.Range[0]
}
if cs.Range[2] > 0 {
a = cs.Range[2]
}
return model.NewPdfColorLab(l, a, b), nil
case *model.PdfColorspaceICCBased:
if cs.Alternate == nil {
// Alternate not defined.
// Try to fall back to DeviceGray, DeviceRGB or DeviceCMYK.
common.Log.Trace("ICC Based not defined - attempting fall back (N = %d)", cs.N)
switch cs.N {
case 1:
common.Log.Trace("Falling back to DeviceGray")
return csp.getInitialColor(model.NewPdfColorspaceDeviceGray())
case 3:
common.Log.Trace("Falling back to DeviceRGB")
return csp.getInitialColor(model.NewPdfColorspaceDeviceRGB())
case 4:
common.Log.Trace("Falling back to DeviceCMYK")
return csp.getInitialColor(model.NewPdfColorspaceDeviceCMYK())
default:
return nil, errors.New("alternate space not defined for ICC")
}
}
return csp.getInitialColor(cs.Alternate)
case *model.PdfColorspaceSpecialIndexed:
if cs.Base == nil {
return nil, errors.New("indexed base not specified")
}
return csp.getInitialColor(cs.Base)
case *model.PdfColorspaceSpecialSeparation:
if cs.AlternateSpace == nil {
return nil, errors.New("alternate space not specified")
}
return csp.getInitialColor(cs.AlternateSpace)
case *model.PdfColorspaceDeviceN:
if cs.AlternateSpace == nil {
return nil, errors.New("alternate space not specified")
}
return csp.getInitialColor(cs.AlternateSpace)
case *model.PdfColorspaceSpecialPattern:
// FIXME/check: A pattern does not have an initial color...
return nil, nil
}
common.Log.Debug("Unable to determine initial color for unknown colorspace: %T", cs)
return nil, errors.New("unsupported colorspace")
}
// Process the entire operations.
func (ce *ContentStreamProcessor) Process(resources *model.PdfPageResources) error {
// Initialize graphics state
ce.graphicsState.ColorspaceStroking = model.NewPdfColorspaceDeviceGray()
ce.graphicsState.ColorspaceNonStroking = model.NewPdfColorspaceDeviceGray()
ce.graphicsState.ColorStroking = model.NewPdfColorDeviceGray(0)
ce.graphicsState.ColorNonStroking = model.NewPdfColorDeviceGray(0)
for _, op := range ce.operations {
var err error
// Internal handling.
switch op.Operand {
case "q":
ce.graphicsStack.Push(ce.graphicsState)
case "Q":
ce.graphicsState = ce.graphicsStack.Pop()
// Color operations (Table 74 p. 179)
case "CS":
err = ce.handleCommand_CS(op, resources)
case "cs":
err = ce.handleCommand_cs(op, resources)
case "SC":
err = ce.handleCommand_SC(op)
case "SCN":
err = ce.handleCommand_SCN(op)
case "sc":
err = ce.handleCommand_sc(op)
case "scn":
err = ce.handleCommand_scn(op)
case "G":
err = ce.handleCommand_G(op)
case "g":
err = ce.handleCommand_g(op)
case "RG":
err = ce.handleCommand_RG(op)
case "rg":
err = ce.handleCommand_rg(op)
case "K":
err = ce.handleCommand_K(op)
case "k":
err = ce.handleCommand_k(op)
}
if err != nil {
common.Log.Debug("Processor handling error (%s): %v", op.Operand, err)
common.Log.Debug("Operand: %#v", op.Operand)
return err
}
// Check if have external handler also, and process if so.
for _, entry := range ce.handlers {
var err error
if entry.Condition.All() {
err = entry.Handler(op, ce.graphicsState, resources)
} else if entry.Condition.Operand() && op.Operand == entry.Operand {
err = entry.Handler(op, ce.graphicsState, resources)
}
if err != nil {
common.Log.Debug("Processor handler error: %v", err)
return err
}
}
}
return nil
}
// CS: Set the current color space for stroking operations.
func (csp *ContentStreamProcessor) handleCommand_CS(op *ContentStreamOperation, resources *model.PdfPageResources) error {
if len(op.Params) < 1 {
common.Log.Debug("invalid cs command, skipping over")
return errors.New("too few parameters")
}
if len(op.Params) > 1 {
common.Log.Debug("cs command with too many parameters - continuing")
return errors.New("too many parameters")
}
name, ok := op.Params[0].(*core.PdfObjectName)
if !ok {
common.Log.Debug("error: cs command with invalid parameter, skipping over")
return errors.New("type check error")
}
// Set the current color space to use for stroking operations.
// Either device based or referring to resource dict.
cs, err := csp.getColorspace(string(*name), resources)
if err != nil {
return err
}
csp.graphicsState.ColorspaceStroking = cs
// Set initial color.
color, err := csp.getInitialColor(cs)
if err != nil {
return err
}
csp.graphicsState.ColorStroking = color
return nil
}
// cs: Set the current color space for non-stroking operations.
func (csp *ContentStreamProcessor) handleCommand_cs(op *ContentStreamOperation, resources *model.PdfPageResources) error {
if len(op.Params) < 1 {
common.Log.Debug("invalid CS command, skipping over")
return errors.New("too few parameters")
}
if len(op.Params) > 1 {
common.Log.Debug("CS command with too many parameters - continuing")
return errors.New("too many parameters")
}
name, ok := op.Params[0].(*core.PdfObjectName)
if !ok {
common.Log.Debug("error: CS command with invalid parameter, skipping over")
return errors.New("type check error")
}
// Set the current color space to use for non-stroking operations.
// Either device based or referring to resource dict.
cs, err := csp.getColorspace(string(*name), resources)
if err != nil {
return err
}
csp.graphicsState.ColorspaceNonStroking = cs
// Set initial color.
color, err := csp.getInitialColor(cs)
if err != nil {
return err
}
csp.graphicsState.ColorNonStroking = color
return nil
}
// SC: Set the color to use for stroking operations in a device, CIE-based or Indexed colorspace. (not ICC based)
func (sp *ContentStreamProcessor) handleCommand_SC(op *ContentStreamOperation) error {
// For DeviceGray, CalGray, Indexed: one operand is required
// For DeviceRGB, CalRGB, Lab: 3 operands required
cs := sp.graphicsState.ColorspaceStroking
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorStroking = color
return nil
}
func isPatternCS(cs model.PdfColorspace) bool {
_, isPattern := cs.(*model.PdfColorspaceSpecialPattern)
return isPattern
}
// SCN: Same as SC but also supports Pattern, Separation, DeviceN and ICCBased color spaces.
func (sp *ContentStreamProcessor) handleCommand_SCN(op *ContentStreamOperation) error {
cs := sp.graphicsState.ColorspaceStroking
if !isPatternCS(cs) {
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorStroking = color
return nil
}
// sc: Same as SC except used for non-stroking operations.
func (sp *ContentStreamProcessor) handleCommand_sc(op *ContentStreamOperation) error {
cs := sp.graphicsState.ColorspaceNonStroking
if !isPatternCS(cs) {
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorNonStroking = color
return nil
}
// scn: Same as SCN except used for non-stroking operations.
func (sp *ContentStreamProcessor) handleCommand_scn(op *ContentStreamOperation) error {
cs := sp.graphicsState.ColorspaceNonStroking
if !isPatternCS(cs) {
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
common.Log.Debug("error: Fail to get color from params: %+v (CS is %+v)", op.Params, cs)
return err
}
sp.graphicsState.ColorNonStroking = color
return nil
}
// G: Set the stroking colorspace to DeviceGray, and the color to the specified graylevel (range [0-1]).
// gray G
func (sp *ContentStreamProcessor) handleCommand_G(op *ContentStreamOperation) error {
cs := model.NewPdfColorspaceDeviceGray()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorspaceStroking = cs
sp.graphicsState.ColorStroking = color
return nil
}
// g: Same as G, but for non-stroking colorspace and color (range [0-1]).
// gray g
func (sp *ContentStreamProcessor) handleCommand_g(op *ContentStreamOperation) error {
cs := model.NewPdfColorspaceDeviceGray()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorspaceNonStroking = cs
sp.graphicsState.ColorNonStroking = color
return nil
}
// RG: Sets the stroking colorspace to DeviceRGB and the stroking color to r,g,b. [0-1] ranges.
// r g b RG
func (sp *ContentStreamProcessor) handleCommand_RG(op *ContentStreamOperation) error {
cs := model.NewPdfColorspaceDeviceRGB()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorspaceStroking = cs
sp.graphicsState.ColorStroking = color
return nil
}
// rg: Same as RG but for non-stroking colorspace, color.
func (sp *ContentStreamProcessor) handleCommand_rg(op *ContentStreamOperation) error {
cs := model.NewPdfColorspaceDeviceRGB()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorspaceNonStroking = cs
sp.graphicsState.ColorNonStroking = color
return nil
}
// K: Sets the stroking colorspace to DeviceCMYK and the stroking color to c,m,y,k. [0-1] ranges.
// c m y k K
func (sp *ContentStreamProcessor) handleCommand_K(op *ContentStreamOperation) error {
cs := model.NewPdfColorspaceDeviceCMYK()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorspaceStroking = cs
sp.graphicsState.ColorStroking = color
return nil
}
// k: Same as K but for non-stroking colorspace, color.
func (sp *ContentStreamProcessor) handleCommand_k(op *ContentStreamOperation) error {
cs := model.NewPdfColorspaceDeviceCMYK()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("invalid number of parameters")
}
color, err := cs.ColorFromPdfObjects(op.Params)
if err != nil {
return err
}
sp.graphicsState.ColorspaceNonStroking = cs
sp.graphicsState.ColorNonStroking = color
return nil
}

View File

@@ -0,0 +1,90 @@
package contentstream
import (
"errors"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/model"
)
func makeParamsFromFloats(vals []float64) []core.PdfObject {
params := []core.PdfObject{}
for _, val := range vals {
params = append(params, core.MakeFloat(val))
}
return params
}
func makeParamsFromNames(vals []core.PdfObjectName) []core.PdfObject {
params := []core.PdfObject{}
for _, val := range vals {
params = append(params, core.MakeName(string(val)))
}
return params
}
func makeParamsFromStrings(vals []core.PdfObjectString) []core.PdfObject {
params := []core.PdfObject{}
for _, val := range vals {
params = append(params, core.MakeString(string(val)))
}
return params
}
func makeParamsFromInts(vals []int64) []core.PdfObject {
params := []core.PdfObject{}
for _, val := range vals {
params = append(params, core.MakeInteger(val))
}
return params
}
func newIndexedColorspaceFromPdfObject(obj core.PdfObject) (model.PdfColorspace, error) {
arr, ok := obj.(*core.PdfObjectArray)
if !ok {
common.Log.Debug("error: Invalid indexed cs not in array (%#v)", obj)
return nil, errors.New("type check error")
}
if len(*arr) != 4 {
common.Log.Debug("error: Invalid cs array, length != 4 (%d)", len(*arr))
return nil, errors.New("range check error")
}
// Format is [/I base 255 bytes], where base = /G,/RGB,/CMYK
name, ok := (*arr)[0].(*core.PdfObjectName)
if !ok {
common.Log.Debug("error: Invalid cs array first element not a name (array: %#v)", *arr)
return nil, errors.New("type check error")
}
if *name != "I" && *name != "Indexed" {
common.Log.Debug("error: Invalid cs array first element != I (got: %v)", *name)
return nil, errors.New("range check error")
}
// Check base
name, ok = (*arr)[1].(*core.PdfObjectName)
if !ok {
common.Log.Debug("error: Invalid cs array 2nd element not a name (array: %#v)", *arr)
return nil, errors.New("type check error")
}
if *name != "G" && *name != "RGB" && *name != "CMYK" && *name != "DeviceGray" && *name != "DeviceRGB" && *name != "DeviceCMYK" {
common.Log.Debug("error: Invalid cs array 2nd element != G/RGB/CMYK (got: %v)", *name)
return nil, errors.New("range check error")
}
basename := ""
switch *name {
case "G", "DeviceGray":
basename = "DeviceGray"
case "RGB", "DeviceRGB":
basename = "DeviceRGB"
case "CMYK", "DeviceCMYK":
basename = "DeviceCMYK"
}
// Prepare to a format that can be loaded by model's newPdfColorspaceFromPdfObject.
csArr := core.MakeArray(core.MakeName("Indexed"), core.MakeName(basename), (*arr)[2], (*arr)[3])
return model.NewPdfColorspaceFromPdfObject(csArr)
}