some improvments

This commit is contained in:
Adrian Zürcher
2026-01-01 11:00:23 +01:00
parent 8f313c00f0
commit ef0778c8b3
20 changed files with 305 additions and 385 deletions

View File

@@ -35,10 +35,10 @@ type PdfParser struct {
rs io.ReadSeeker
reader *bufio.Reader
fileSize int64
xrefs XrefTable
xrefs xRefTable
objstms ObjectStreams
trailer *PdfObjectDictionary
ObjCache ObjectCache // TODO: Unexport (v3).
ObjCache objectCache
crypter *PdfCrypt
repairsAttempted bool // Avoid multiple attempts for repair.
@@ -740,8 +740,8 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
// would be marked as free. But can still happen!
x, ok := parser.xrefs[curObjNum]
if !ok || gen > x.generation {
obj := XrefObject{objectNumber: curObjNum,
xtype: XREF_TABLE_ENTRY,
obj := xRefObject{objectNumber: curObjNum,
xtype: xRefTableEntry,
offset: first, generation: gen}
parser.xrefs[curObjNum] = obj
}
@@ -1000,16 +1000,16 @@ func (parser *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDict
if xr, ok := parser.xrefs[objNum]; !ok || int(n3) > xr.generation {
// Only overload if not already loaded!
// or has a newer generation number. (should not happen)
obj := XrefObject{objectNumber: objNum,
xtype: XREF_TABLE_ENTRY, offset: n2, generation: int(n3)}
obj := xRefObject{objectNumber: objNum,
xtype: xRefTableEntry, offset: n2, generation: int(n3)}
parser.xrefs[objNum] = obj
}
case 2:
// Object type 2: Compressed object.
common.Log.Trace("- In use - compressed object")
if _, ok := parser.xrefs[objNum]; !ok {
obj := XrefObject{objectNumber: objNum,
xtype: XREF_OBJECT_STREAM, osObjNumber: int(n2), osObjIndex: int(n3)}
obj := xRefObject{objectNumber: objNum,
xtype: xRefObjectStream, osObjNumber: int(n2), osObjIndex: int(n3)}
parser.xrefs[objNum] = obj
common.Log.Trace("entry: %s", parser.xrefs[objNum])
}
@@ -1128,7 +1128,7 @@ func (parser *PdfParser) seekToEOFMarker(fSize int64) error {
// The earlier xrefs have higher precedence. If objects already
// loaded will ignore older versions.
func (parser *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
parser.xrefs = make(XrefTable)
parser.xrefs = make(xRefTable)
parser.objstms = make(ObjectStreams)
// Get the file size.
@@ -1482,30 +1482,13 @@ func (parser *PdfParser) ParseIndirectObject() (PdfObject, error) {
return &indirect, nil
}
// For testing purposes.
// TODO: Unexport (v3) or move to test files, if needed by external test cases.
func NewParserFromString(txt string) *PdfParser {
parser := PdfParser{}
buf := []byte(txt)
bufReader := bytes.NewReader(buf)
parser.rs = bufReader
bufferedReader := bufio.NewReader(bufReader)
parser.reader = bufferedReader
parser.fileSize = int64(len(txt))
return &parser
}
// NewParser creates a new parser for a PDF file via ReadSeeker. Loads the cross reference stream and trailer.
// An error is returned on failure.
func NewParser(rs io.ReadSeeker) (*PdfParser, error) {
parser := &PdfParser{}
parser.rs = rs
parser.ObjCache = make(ObjectCache)
parser.ObjCache = make(objectCache)
parser.streamLengthReferenceLookupInProgress = map[int64]bool{}
// Start by reading the xrefs (from bottom).