some improvments

2026-01-01 11:00:23 +01:00
parent 8f313c00f0
commit ef0778c8b3
20 changed files with 305 additions and 385 deletions
--- a/internal/pdf/core/crossrefs.go
+++ b/internal/pdf/core/crossrefs.go
@@ -10,21 +10,18 @@ import (
 	"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
 )

-// TODO (v3): Create a new type xrefType which can be an integer and can be used for improved type checking.
-// TODO (v3): Unexport these constants and rename with camelCase.
 const (
-	// XREF_TABLE_ENTRY indicates a normal xref table entry.
-	XREF_TABLE_ENTRY = iota
+	// xRefTableEntry indicates a normal xref table entry.
+	xRefTableEntry = iota

-	// XREF_OBJECT_STREAM indicates an xref entry in an xref object stream.
-	XREF_OBJECT_STREAM = iota
+	// xRefObjectStream indicates an xref entry in an xref object stream.
+	xRefObjectStream = iota
 )

 // XrefObject defines a cross reference entry which is a map between object number (with generation number) and the
 // location of the actual object, either as a file offset (xref table entry), or as a location within an xref
 // stream object (xref object stream).
-// TODO (v3): Unexport.
-type XrefObject struct {
+type xRefObject struct {
 	xtype        int
 	objectNumber int
 	generation   int
@@ -36,32 +33,28 @@ type XrefObject struct {
 }

 // XrefTable is a map between object number and corresponding XrefObject.
-// TODO (v3): Unexport.
-// TODO: Consider changing to a slice, so can maintain the object order without sorting when analyzing.
-type XrefTable map[int]XrefObject
+type xRefTable map[int]xRefObject

 // ObjectStream represents an object stream's information which can contain multiple indirect objects.
 // The information specifies the number of objects and has information about offset locations for
 // each object.
-// TODO (v3): Unexport.
-type ObjectStream struct {
-	N       int // TODO (v3): Unexport.
+type objectStream struct {
+	n       int
 	ds      []byte
 	offsets map[int]int64
 }

 // ObjectStreams defines a map between object numbers (object streams only) and underlying ObjectStream information.
-type ObjectStreams map[int]ObjectStream
+type ObjectStreams map[int]objectStream

 // ObjectCache defines a map between object numbers and corresponding PdfObject. Serves as a cache for PdfObjects that
 // have already been parsed.
-// TODO (v3): Unexport.
-type ObjectCache map[int]PdfObject
+type objectCache map[int]PdfObject

 // Get an object from an object stream.
 func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObject, error) {
 	var bufReader *bytes.Reader
-	var objstm ObjectStream
+	var objstm objectStream
 	var cached bool

 	objstm, cached = parser.objstms[sobjNumber]
@@ -149,7 +142,7 @@ func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObjec
 			offsets[int(*onum)] = int64(*firstOffset + *offset)
 		}

-		objstm = ObjectStream{N: int(*N), ds: ds, offsets: offsets}
+		objstm = objectStream{n: int(*N), ds: ds, offsets: offsets}
 		parser.objstms[sobjNumber] = objstm
 	} else {
 		// Temporarily change the reader object to this decoded buffer.
@@ -246,7 +239,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf

 	common.Log.Trace("Lookup obj number %d", objNumber)
 	switch xref.xtype {
-	case XREF_TABLE_ENTRY:
+	case xRefTableEntry:
 		common.Log.Trace("xrefobj obj num %d", xref.objectNumber)
 		common.Log.Trace("xrefobj gen %d", xref.generation)
 		common.Log.Trace("xrefobj offset %d", xref.offset)
@@ -283,7 +276,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf
 					return nil, false, err
 				}
 				// Empty the cache.
-				parser.ObjCache = ObjectCache{}
+				parser.ObjCache = objectCache{}
 				// Try looking up again and return.
 				return parser.lookupByNumberWrapper(objNumber, false)
 			}
@@ -292,7 +285,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf
 		common.Log.Trace("Returning obj")
 		parser.ObjCache[objNumber] = obj
 		return obj, false, nil
-	case XREF_OBJECT_STREAM:
+	case xRefObjectStream:
 		common.Log.Trace("xref from object stream!")
 		common.Log.Trace(">Load via OS!")
 		common.Log.Trace("Object stream available in object %d/%d", xref.osObjNumber, xref.osObjIndex)
@@ -361,7 +354,7 @@ func (parser *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
 	return o, nil
 }

-func printXrefTable(xrefTable XrefTable) {
+func printXrefTable(xrefTable xRefTable) {
 	common.Log.Debug("=X=X=X=")
 	common.Log.Debug("Xref table:")
 	i := 0
--- a/internal/pdf/core/parser.go
+++ b/internal/pdf/core/parser.go
@@ -35,10 +35,10 @@ type PdfParser struct {
 	rs               io.ReadSeeker
 	reader           *bufio.Reader
 	fileSize         int64
-	xrefs            XrefTable
+	xrefs            xRefTable
 	objstms          ObjectStreams
 	trailer          *PdfObjectDictionary
-	ObjCache         ObjectCache // TODO: Unexport (v3).
+	ObjCache         objectCache
 	crypter          *PdfCrypt
 	repairsAttempted bool // Avoid multiple attempts for repair.

@@ -740,8 +740,8 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
 				// would be marked as free.  But can still happen!
 				x, ok := parser.xrefs[curObjNum]
 				if !ok || gen > x.generation {
-					obj := XrefObject{objectNumber: curObjNum,
-						xtype:  XREF_TABLE_ENTRY,
+					obj := xRefObject{objectNumber: curObjNum,
+						xtype:  xRefTableEntry,
 						offset: first, generation: gen}
 					parser.xrefs[curObjNum] = obj
 				}
@@ -1000,16 +1000,16 @@ func (parser *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDict
 			if xr, ok := parser.xrefs[objNum]; !ok || int(n3) > xr.generation {
 				// Only overload if not already loaded!
 				// or has a newer generation number. (should not happen)
-				obj := XrefObject{objectNumber: objNum,
-					xtype: XREF_TABLE_ENTRY, offset: n2, generation: int(n3)}
+				obj := xRefObject{objectNumber: objNum,
+					xtype: xRefTableEntry, offset: n2, generation: int(n3)}
 				parser.xrefs[objNum] = obj
 			}
 		case 2:
 			// Object type 2: Compressed object.
 			common.Log.Trace("- In use - compressed object")
 			if _, ok := parser.xrefs[objNum]; !ok {
-				obj := XrefObject{objectNumber: objNum,
-					xtype: XREF_OBJECT_STREAM, osObjNumber: int(n2), osObjIndex: int(n3)}
+				obj := xRefObject{objectNumber: objNum,
+					xtype: xRefObjectStream, osObjNumber: int(n2), osObjIndex: int(n3)}
 				parser.xrefs[objNum] = obj
 				common.Log.Trace("entry: %s", parser.xrefs[objNum])
 			}
@@ -1128,7 +1128,7 @@ func (parser *PdfParser) seekToEOFMarker(fSize int64) error {
 // The earlier xrefs have higher precedence.  If objects already
 // loaded will ignore older versions.
 func (parser *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
-	parser.xrefs = make(XrefTable)
+	parser.xrefs = make(xRefTable)
 	parser.objstms = make(ObjectStreams)

 	// Get the file size.
@@ -1482,30 +1482,13 @@ func (parser *PdfParser) ParseIndirectObject() (PdfObject, error) {
 	return &indirect, nil
 }

-// For testing purposes.
-// TODO: Unexport (v3) or move to test files, if needed by external test cases.
-func NewParserFromString(txt string) *PdfParser {
-	parser := PdfParser{}
-	buf := []byte(txt)
-
-	bufReader := bytes.NewReader(buf)
-	parser.rs = bufReader
-
-	bufferedReader := bufio.NewReader(bufReader)
-	parser.reader = bufferedReader
-
-	parser.fileSize = int64(len(txt))
-
-	return &parser
-}
-
 // NewParser creates a new parser for a PDF file via ReadSeeker. Loads the cross reference stream and trailer.
 // An error is returned on failure.
 func NewParser(rs io.ReadSeeker) (*PdfParser, error) {
 	parser := &PdfParser{}

 	parser.rs = rs
-	parser.ObjCache = make(ObjectCache)
+	parser.ObjCache = make(objectCache)
 	parser.streamLengthReferenceLookupInProgress = map[int64]bool{}

 	// Start by reading the xrefs (from bottom).
--- a/internal/pdf/core/primitives.go
+++ b/internal/pdf/core/primitives.go
@@ -3,6 +3,7 @@ package core
 import (
 	"bytes"
 	"fmt"
+	"strings"

 	"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
 )
@@ -315,15 +316,16 @@ func (array *PdfObjectArray) String() string {

 // DefaultWriteString outputs the object as it is to be written to file.
 func (array *PdfObjectArray) DefaultWriteString() string {
-	outStr := "["
+	var outStr strings.Builder
+	outStr.WriteString("[")
 	for ind, o := range *array {
-		outStr += o.DefaultWriteString()
+		outStr.WriteString(o.DefaultWriteString())
 		if ind < (len(*array) - 1) {
-			outStr += " "
+			outStr.WriteString(" ")
 		}
 	}
-	outStr += "]"
-	return outStr
+	outStr.WriteString("]")
+	return outStr.String()
 }

 // Append adds an PdfObject to the array.
--- a/internal/pdf/core/repairs.go
+++ b/internal/pdf/core/repairs.go
@@ -45,7 +45,7 @@ func (parser *PdfParser) repairLocateXref() (int64, error) {
 // Useful when the cross reference is pointing to an object with the wrong number.
 // Update the table.
 func (parser *PdfParser) rebuildXrefTable() error {
-	newXrefs := XrefTable{}
+	newXrefs := xRefTable{}
 	for objNum, xref := range parser.xrefs {
 		obj, _, err := parser.lookupByNumberWrapper(objNum, false)
 		if err != nil {
@@ -92,7 +92,7 @@ func parseObjectNumberFromString(str string) (int, int, error) {
 // Parse the entire file from top down.
 // Goes through the file byte-by-byte looking for "<num> <generation> obj" patterns.
 // N.B. This collects the XREF_TABLE_ENTRY data only.
-func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
+func (parser *PdfParser) repairRebuildXrefsTopDown() (*xRefTable, error) {
 	if parser.repairsAttempted {
 		// Avoid multiple repairs (only try once).
 		return nil, fmt.Errorf("repair failed")
@@ -107,7 +107,7 @@ func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
 	bufLen := 20
 	last := make([]byte, bufLen)

-	xrefTable := XrefTable{}
+	xrefTable := xRefTable{}
 	for {
 		b, err := parser.reader.ReadByte()
 		if err != nil {
@@ -164,8 +164,8 @@ func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
 			// Create and insert the XREF entry if not existing, or the generation number is higher.
 			if curXref, has := xrefTable[objNum]; !has || curXref.generation < genNum {
 				// Make the entry for the cross ref table.
-				xrefEntry := XrefObject{}
-				xrefEntry.xtype = XREF_TABLE_ENTRY
+				xrefEntry := xRefObject{}
+				xrefEntry.xtype = xRefTableEntry
 				xrefEntry.objectNumber = int(objNum)
 				xrefEntry.generation = int(genNum)
 				xrefEntry.offset = objOffset
--- a/internal/pdf/core/symbols.go
+++ b/internal/pdf/core/symbols.go
@@ -49,7 +49,6 @@ func IsPrintable(char byte) bool {
 }

 // IsDelimiter checks if a character represents a delimiter.
-// TODO (v3): Unexport.
 func IsDelimiter(char byte) bool {
 	if char == '(' || char == ')' {
 		return true