some improvments
This commit is contained in:
@@ -10,21 +10,18 @@ import (
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
// TODO (v3): Create a new type xrefType which can be an integer and can be used for improved type checking.
|
||||
// TODO (v3): Unexport these constants and rename with camelCase.
|
||||
const (
|
||||
// XREF_TABLE_ENTRY indicates a normal xref table entry.
|
||||
XREF_TABLE_ENTRY = iota
|
||||
// xRefTableEntry indicates a normal xref table entry.
|
||||
xRefTableEntry = iota
|
||||
|
||||
// XREF_OBJECT_STREAM indicates an xref entry in an xref object stream.
|
||||
XREF_OBJECT_STREAM = iota
|
||||
// xRefObjectStream indicates an xref entry in an xref object stream.
|
||||
xRefObjectStream = iota
|
||||
)
|
||||
|
||||
// XrefObject defines a cross reference entry which is a map between object number (with generation number) and the
|
||||
// location of the actual object, either as a file offset (xref table entry), or as a location within an xref
|
||||
// stream object (xref object stream).
|
||||
// TODO (v3): Unexport.
|
||||
type XrefObject struct {
|
||||
type xRefObject struct {
|
||||
xtype int
|
||||
objectNumber int
|
||||
generation int
|
||||
@@ -36,32 +33,28 @@ type XrefObject struct {
|
||||
}
|
||||
|
||||
// XrefTable is a map between object number and corresponding XrefObject.
|
||||
// TODO (v3): Unexport.
|
||||
// TODO: Consider changing to a slice, so can maintain the object order without sorting when analyzing.
|
||||
type XrefTable map[int]XrefObject
|
||||
type xRefTable map[int]xRefObject
|
||||
|
||||
// ObjectStream represents an object stream's information which can contain multiple indirect objects.
|
||||
// The information specifies the number of objects and has information about offset locations for
|
||||
// each object.
|
||||
// TODO (v3): Unexport.
|
||||
type ObjectStream struct {
|
||||
N int // TODO (v3): Unexport.
|
||||
type objectStream struct {
|
||||
n int
|
||||
ds []byte
|
||||
offsets map[int]int64
|
||||
}
|
||||
|
||||
// ObjectStreams defines a map between object numbers (object streams only) and underlying ObjectStream information.
|
||||
type ObjectStreams map[int]ObjectStream
|
||||
type ObjectStreams map[int]objectStream
|
||||
|
||||
// ObjectCache defines a map between object numbers and corresponding PdfObject. Serves as a cache for PdfObjects that
|
||||
// have already been parsed.
|
||||
// TODO (v3): Unexport.
|
||||
type ObjectCache map[int]PdfObject
|
||||
type objectCache map[int]PdfObject
|
||||
|
||||
// Get an object from an object stream.
|
||||
func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObject, error) {
|
||||
var bufReader *bytes.Reader
|
||||
var objstm ObjectStream
|
||||
var objstm objectStream
|
||||
var cached bool
|
||||
|
||||
objstm, cached = parser.objstms[sobjNumber]
|
||||
@@ -149,7 +142,7 @@ func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObjec
|
||||
offsets[int(*onum)] = int64(*firstOffset + *offset)
|
||||
}
|
||||
|
||||
objstm = ObjectStream{N: int(*N), ds: ds, offsets: offsets}
|
||||
objstm = objectStream{n: int(*N), ds: ds, offsets: offsets}
|
||||
parser.objstms[sobjNumber] = objstm
|
||||
} else {
|
||||
// Temporarily change the reader object to this decoded buffer.
|
||||
@@ -246,7 +239,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf
|
||||
|
||||
common.Log.Trace("Lookup obj number %d", objNumber)
|
||||
switch xref.xtype {
|
||||
case XREF_TABLE_ENTRY:
|
||||
case xRefTableEntry:
|
||||
common.Log.Trace("xrefobj obj num %d", xref.objectNumber)
|
||||
common.Log.Trace("xrefobj gen %d", xref.generation)
|
||||
common.Log.Trace("xrefobj offset %d", xref.offset)
|
||||
@@ -283,7 +276,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf
|
||||
return nil, false, err
|
||||
}
|
||||
// Empty the cache.
|
||||
parser.ObjCache = ObjectCache{}
|
||||
parser.ObjCache = objectCache{}
|
||||
// Try looking up again and return.
|
||||
return parser.lookupByNumberWrapper(objNumber, false)
|
||||
}
|
||||
@@ -292,7 +285,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf
|
||||
common.Log.Trace("Returning obj")
|
||||
parser.ObjCache[objNumber] = obj
|
||||
return obj, false, nil
|
||||
case XREF_OBJECT_STREAM:
|
||||
case xRefObjectStream:
|
||||
common.Log.Trace("xref from object stream!")
|
||||
common.Log.Trace(">Load via OS!")
|
||||
common.Log.Trace("Object stream available in object %d/%d", xref.osObjNumber, xref.osObjIndex)
|
||||
@@ -361,7 +354,7 @@ func (parser *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
|
||||
return o, nil
|
||||
}
|
||||
|
||||
func printXrefTable(xrefTable XrefTable) {
|
||||
func printXrefTable(xrefTable xRefTable) {
|
||||
common.Log.Debug("=X=X=X=")
|
||||
common.Log.Debug("Xref table:")
|
||||
i := 0
|
||||
|
||||
@@ -35,10 +35,10 @@ type PdfParser struct {
|
||||
rs io.ReadSeeker
|
||||
reader *bufio.Reader
|
||||
fileSize int64
|
||||
xrefs XrefTable
|
||||
xrefs xRefTable
|
||||
objstms ObjectStreams
|
||||
trailer *PdfObjectDictionary
|
||||
ObjCache ObjectCache // TODO: Unexport (v3).
|
||||
ObjCache objectCache
|
||||
crypter *PdfCrypt
|
||||
repairsAttempted bool // Avoid multiple attempts for repair.
|
||||
|
||||
@@ -740,8 +740,8 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
|
||||
// would be marked as free. But can still happen!
|
||||
x, ok := parser.xrefs[curObjNum]
|
||||
if !ok || gen > x.generation {
|
||||
obj := XrefObject{objectNumber: curObjNum,
|
||||
xtype: XREF_TABLE_ENTRY,
|
||||
obj := xRefObject{objectNumber: curObjNum,
|
||||
xtype: xRefTableEntry,
|
||||
offset: first, generation: gen}
|
||||
parser.xrefs[curObjNum] = obj
|
||||
}
|
||||
@@ -1000,16 +1000,16 @@ func (parser *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDict
|
||||
if xr, ok := parser.xrefs[objNum]; !ok || int(n3) > xr.generation {
|
||||
// Only overload if not already loaded!
|
||||
// or has a newer generation number. (should not happen)
|
||||
obj := XrefObject{objectNumber: objNum,
|
||||
xtype: XREF_TABLE_ENTRY, offset: n2, generation: int(n3)}
|
||||
obj := xRefObject{objectNumber: objNum,
|
||||
xtype: xRefTableEntry, offset: n2, generation: int(n3)}
|
||||
parser.xrefs[objNum] = obj
|
||||
}
|
||||
case 2:
|
||||
// Object type 2: Compressed object.
|
||||
common.Log.Trace("- In use - compressed object")
|
||||
if _, ok := parser.xrefs[objNum]; !ok {
|
||||
obj := XrefObject{objectNumber: objNum,
|
||||
xtype: XREF_OBJECT_STREAM, osObjNumber: int(n2), osObjIndex: int(n3)}
|
||||
obj := xRefObject{objectNumber: objNum,
|
||||
xtype: xRefObjectStream, osObjNumber: int(n2), osObjIndex: int(n3)}
|
||||
parser.xrefs[objNum] = obj
|
||||
common.Log.Trace("entry: %s", parser.xrefs[objNum])
|
||||
}
|
||||
@@ -1128,7 +1128,7 @@ func (parser *PdfParser) seekToEOFMarker(fSize int64) error {
|
||||
// The earlier xrefs have higher precedence. If objects already
|
||||
// loaded will ignore older versions.
|
||||
func (parser *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
|
||||
parser.xrefs = make(XrefTable)
|
||||
parser.xrefs = make(xRefTable)
|
||||
parser.objstms = make(ObjectStreams)
|
||||
|
||||
// Get the file size.
|
||||
@@ -1482,30 +1482,13 @@ func (parser *PdfParser) ParseIndirectObject() (PdfObject, error) {
|
||||
return &indirect, nil
|
||||
}
|
||||
|
||||
// For testing purposes.
|
||||
// TODO: Unexport (v3) or move to test files, if needed by external test cases.
|
||||
func NewParserFromString(txt string) *PdfParser {
|
||||
parser := PdfParser{}
|
||||
buf := []byte(txt)
|
||||
|
||||
bufReader := bytes.NewReader(buf)
|
||||
parser.rs = bufReader
|
||||
|
||||
bufferedReader := bufio.NewReader(bufReader)
|
||||
parser.reader = bufferedReader
|
||||
|
||||
parser.fileSize = int64(len(txt))
|
||||
|
||||
return &parser
|
||||
}
|
||||
|
||||
// NewParser creates a new parser for a PDF file via ReadSeeker. Loads the cross reference stream and trailer.
|
||||
// An error is returned on failure.
|
||||
func NewParser(rs io.ReadSeeker) (*PdfParser, error) {
|
||||
parser := &PdfParser{}
|
||||
|
||||
parser.rs = rs
|
||||
parser.ObjCache = make(ObjectCache)
|
||||
parser.ObjCache = make(objectCache)
|
||||
parser.streamLengthReferenceLookupInProgress = map[int64]bool{}
|
||||
|
||||
// Start by reading the xrefs (from bottom).
|
||||
|
||||
@@ -3,6 +3,7 @@ package core
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
@@ -315,15 +316,16 @@ func (array *PdfObjectArray) String() string {
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (array *PdfObjectArray) DefaultWriteString() string {
|
||||
outStr := "["
|
||||
var outStr strings.Builder
|
||||
outStr.WriteString("[")
|
||||
for ind, o := range *array {
|
||||
outStr += o.DefaultWriteString()
|
||||
outStr.WriteString(o.DefaultWriteString())
|
||||
if ind < (len(*array) - 1) {
|
||||
outStr += " "
|
||||
outStr.WriteString(" ")
|
||||
}
|
||||
}
|
||||
outStr += "]"
|
||||
return outStr
|
||||
outStr.WriteString("]")
|
||||
return outStr.String()
|
||||
}
|
||||
|
||||
// Append adds an PdfObject to the array.
|
||||
|
||||
@@ -45,7 +45,7 @@ func (parser *PdfParser) repairLocateXref() (int64, error) {
|
||||
// Useful when the cross reference is pointing to an object with the wrong number.
|
||||
// Update the table.
|
||||
func (parser *PdfParser) rebuildXrefTable() error {
|
||||
newXrefs := XrefTable{}
|
||||
newXrefs := xRefTable{}
|
||||
for objNum, xref := range parser.xrefs {
|
||||
obj, _, err := parser.lookupByNumberWrapper(objNum, false)
|
||||
if err != nil {
|
||||
@@ -92,7 +92,7 @@ func parseObjectNumberFromString(str string) (int, int, error) {
|
||||
// Parse the entire file from top down.
|
||||
// Goes through the file byte-by-byte looking for "<num> <generation> obj" patterns.
|
||||
// N.B. This collects the XREF_TABLE_ENTRY data only.
|
||||
func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
|
||||
func (parser *PdfParser) repairRebuildXrefsTopDown() (*xRefTable, error) {
|
||||
if parser.repairsAttempted {
|
||||
// Avoid multiple repairs (only try once).
|
||||
return nil, fmt.Errorf("repair failed")
|
||||
@@ -107,7 +107,7 @@ func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
|
||||
bufLen := 20
|
||||
last := make([]byte, bufLen)
|
||||
|
||||
xrefTable := XrefTable{}
|
||||
xrefTable := xRefTable{}
|
||||
for {
|
||||
b, err := parser.reader.ReadByte()
|
||||
if err != nil {
|
||||
@@ -164,8 +164,8 @@ func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
|
||||
// Create and insert the XREF entry if not existing, or the generation number is higher.
|
||||
if curXref, has := xrefTable[objNum]; !has || curXref.generation < genNum {
|
||||
// Make the entry for the cross ref table.
|
||||
xrefEntry := XrefObject{}
|
||||
xrefEntry.xtype = XREF_TABLE_ENTRY
|
||||
xrefEntry := xRefObject{}
|
||||
xrefEntry.xtype = xRefTableEntry
|
||||
xrefEntry.objectNumber = int(objNum)
|
||||
xrefEntry.generation = int(genNum)
|
||||
xrefEntry.offset = objOffset
|
||||
|
||||
@@ -49,7 +49,6 @@ func IsPrintable(char byte) bool {
|
||||
}
|
||||
|
||||
// IsDelimiter checks if a character represents a delimiter.
|
||||
// TODO (v3): Unexport.
|
||||
func IsDelimiter(char byte) bool {
|
||||
if char == '(' || char == ')' {
|
||||
return true
|
||||
|
||||
Reference in New Issue
Block a user