fix wrong git ignore
This commit is contained in:
13
internal/pdf/core/const.go
Normal file
13
internal/pdf/core/const.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package core
|
||||
|
||||
import "errors"
|
||||
|
||||
var (
|
||||
// ErrUnsupportedEncodingParameters error indicates that encoding/decoding was attempted with unsupported
|
||||
// encoding parameters.
|
||||
// For example when trying to encode with an unsupported Predictor (flate).
|
||||
ErrUnsupportedEncodingParameters = errors.New("unsupported encoding parameters")
|
||||
ErrNoCCITTFaxDecode = errors.New(" CCITTFaxDecode encoding is not yet implemented")
|
||||
ErrNoJBIG2Decode = errors.New(" JBIG2Decode encoding is not yet implemented")
|
||||
ErrNoJPXDecode = errors.New(" JPXDecode encoding is not yet implemented")
|
||||
)
|
||||
372
internal/pdf/core/crossrefs.go
Normal file
372
internal/pdf/core/crossrefs.go
Normal file
@@ -0,0 +1,372 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
// TODO (v3): Create a new type xrefType which can be an integer and can be used for improved type checking.
|
||||
// TODO (v3): Unexport these constants and rename with camelCase.
|
||||
const (
|
||||
// XREF_TABLE_ENTRY indicates a normal xref table entry.
|
||||
XREF_TABLE_ENTRY = iota
|
||||
|
||||
// XREF_OBJECT_STREAM indicates an xref entry in an xref object stream.
|
||||
XREF_OBJECT_STREAM = iota
|
||||
)
|
||||
|
||||
// XrefObject defines a cross reference entry which is a map between object number (with generation number) and the
|
||||
// location of the actual object, either as a file offset (xref table entry), or as a location within an xref
|
||||
// stream object (xref object stream).
|
||||
// TODO (v3): Unexport.
|
||||
type XrefObject struct {
|
||||
xtype int
|
||||
objectNumber int
|
||||
generation int
|
||||
// For normal xrefs (defined by OFFSET)
|
||||
offset int64
|
||||
// For xrefs to object streams.
|
||||
osObjNumber int
|
||||
osObjIndex int
|
||||
}
|
||||
|
||||
// XrefTable is a map between object number and corresponding XrefObject.
|
||||
// TODO (v3): Unexport.
|
||||
// TODO: Consider changing to a slice, so can maintain the object order without sorting when analyzing.
|
||||
type XrefTable map[int]XrefObject
|
||||
|
||||
// ObjectStream represents an object stream's information which can contain multiple indirect objects.
|
||||
// The information specifies the number of objects and has information about offset locations for
|
||||
// each object.
|
||||
// TODO (v3): Unexport.
|
||||
type ObjectStream struct {
|
||||
N int // TODO (v3): Unexport.
|
||||
ds []byte
|
||||
offsets map[int]int64
|
||||
}
|
||||
|
||||
// ObjectStreams defines a map between object numbers (object streams only) and underlying ObjectStream information.
|
||||
type ObjectStreams map[int]ObjectStream
|
||||
|
||||
// ObjectCache defines a map between object numbers and corresponding PdfObject. Serves as a cache for PdfObjects that
|
||||
// have already been parsed.
|
||||
// TODO (v3): Unexport.
|
||||
type ObjectCache map[int]PdfObject
|
||||
|
||||
// Get an object from an object stream.
|
||||
func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObject, error) {
|
||||
var bufReader *bytes.Reader
|
||||
var objstm ObjectStream
|
||||
var cached bool
|
||||
|
||||
objstm, cached = parser.objstms[sobjNumber]
|
||||
if !cached {
|
||||
soi, err := parser.LookupByNumber(sobjNumber)
|
||||
if err != nil {
|
||||
common.Log.Debug("Missing object stream with number %d", sobjNumber)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
so, ok := soi.(*PdfObjectStream)
|
||||
if !ok {
|
||||
return nil, errors.New("invalid object stream")
|
||||
}
|
||||
|
||||
if parser.crypter != nil && !parser.crypter.isDecrypted(so) {
|
||||
return nil, errors.New("need to decrypt the stream")
|
||||
}
|
||||
|
||||
sod := so.PdfObjectDictionary
|
||||
common.Log.Trace("so d: %s\n", *sod)
|
||||
name, ok := sod.Get("Type").(*PdfObjectName)
|
||||
if !ok {
|
||||
common.Log.Debug("error: Object stream should always have a Type")
|
||||
return nil, errors.New("object stream missing Type")
|
||||
}
|
||||
if strings.ToLower(string(*name)) != "objstm" {
|
||||
common.Log.Debug("error: Object stream type shall always be ObjStm !")
|
||||
return nil, errors.New("object stream type != ObjStm")
|
||||
}
|
||||
|
||||
N, ok := sod.Get("N").(*PdfObjectInteger)
|
||||
if !ok {
|
||||
return nil, errors.New("invalid N in stream dictionary")
|
||||
}
|
||||
firstOffset, ok := sod.Get("First").(*PdfObjectInteger)
|
||||
if !ok {
|
||||
return nil, errors.New("invalid First in stream dictionary")
|
||||
}
|
||||
|
||||
common.Log.Trace("type: %s number of objects: %d", name, *N)
|
||||
ds, err := DecodeStream(so)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
common.Log.Trace("Decoded: %s", ds)
|
||||
|
||||
// Temporarily change the reader object to this decoded buffer.
|
||||
// Change back afterwards.
|
||||
bakOffset := parser.GetFileOffset()
|
||||
defer func() { parser.SetFileOffset(bakOffset) }()
|
||||
|
||||
bufReader = bytes.NewReader(ds)
|
||||
parser.reader = bufio.NewReader(bufReader)
|
||||
|
||||
common.Log.Trace("Parsing offset map")
|
||||
// Load the offset map (relative to the beginning of the stream...)
|
||||
offsets := map[int]int64{}
|
||||
// Object list and offsets.
|
||||
for i := 0; i < int(*N); i++ {
|
||||
parser.skipSpaces()
|
||||
// Object number.
|
||||
obj, err := parser.parseNumber()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
onum, ok := obj.(*PdfObjectInteger)
|
||||
if !ok {
|
||||
return nil, errors.New("invalid object stream offset table")
|
||||
}
|
||||
|
||||
parser.skipSpaces()
|
||||
// Offset.
|
||||
obj, err = parser.parseNumber()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
offset, ok := obj.(*PdfObjectInteger)
|
||||
if !ok {
|
||||
return nil, errors.New("invalid object stream offset table")
|
||||
}
|
||||
|
||||
common.Log.Trace("obj %d offset %d", *onum, *offset)
|
||||
offsets[int(*onum)] = int64(*firstOffset + *offset)
|
||||
}
|
||||
|
||||
objstm = ObjectStream{N: int(*N), ds: ds, offsets: offsets}
|
||||
parser.objstms[sobjNumber] = objstm
|
||||
} else {
|
||||
// Temporarily change the reader object to this decoded buffer.
|
||||
// Point back afterwards.
|
||||
bakOffset := parser.GetFileOffset()
|
||||
defer func() { parser.SetFileOffset(bakOffset) }()
|
||||
|
||||
bufReader = bytes.NewReader(objstm.ds)
|
||||
// Temporarily change the reader object to this decoded buffer.
|
||||
parser.reader = bufio.NewReader(bufReader)
|
||||
}
|
||||
|
||||
offset := objstm.offsets[objNum]
|
||||
common.Log.Trace("ACTUAL offset[%d] = %d", objNum, offset)
|
||||
|
||||
bufReader.Seek(offset, os.SEEK_SET)
|
||||
parser.reader = bufio.NewReader(bufReader)
|
||||
|
||||
bb, _ := parser.reader.Peek(100)
|
||||
common.Log.Trace("OBJ peek \"%s\"", string(bb))
|
||||
|
||||
val, err := parser.parseObject()
|
||||
if err != nil {
|
||||
common.Log.Debug("error Fail to read object (%s)", err)
|
||||
return nil, err
|
||||
}
|
||||
if val == nil {
|
||||
return nil, errors.New("object cannot be null")
|
||||
}
|
||||
|
||||
// Make an indirect object around it.
|
||||
io := PdfIndirectObject{}
|
||||
io.ObjectNumber = int64(objNum)
|
||||
io.PdfObject = val
|
||||
|
||||
return &io, nil
|
||||
}
|
||||
|
||||
// LookupByNumber looks up a PdfObject by object number. Returns an error on failure.
|
||||
// TODO (v3): Unexport.
|
||||
func (parser *PdfParser) LookupByNumber(objNumber int) (PdfObject, error) {
|
||||
// Outside interface for lookupByNumberWrapper. Default attempts repairs of bad xref tables.
|
||||
obj, _, err := parser.lookupByNumberWrapper(objNumber, true)
|
||||
return obj, err
|
||||
}
|
||||
|
||||
// Wrapper for lookupByNumber, checks if object encrypted etc.
|
||||
func (parser *PdfParser) lookupByNumberWrapper(objNumber int, attemptRepairs bool) (PdfObject, bool, error) {
|
||||
obj, inObjStream, err := parser.lookupByNumber(objNumber, attemptRepairs)
|
||||
if err != nil {
|
||||
return nil, inObjStream, err
|
||||
}
|
||||
|
||||
// If encrypted, decrypt it prior to returning.
|
||||
// Do not attempt to decrypt objects within object streams.
|
||||
if !inObjStream && parser.crypter != nil && !parser.crypter.isDecrypted(obj) {
|
||||
err := parser.crypter.Decrypt(obj, 0, 0)
|
||||
if err != nil {
|
||||
return nil, inObjStream, err
|
||||
}
|
||||
}
|
||||
|
||||
return obj, inObjStream, nil
|
||||
}
|
||||
|
||||
func getObjectNumber(obj PdfObject) (int64, int64, error) {
|
||||
if io, isIndirect := obj.(*PdfIndirectObject); isIndirect {
|
||||
return io.ObjectNumber, io.GenerationNumber, nil
|
||||
}
|
||||
if so, isStream := obj.(*PdfObjectStream); isStream {
|
||||
return so.ObjectNumber, so.GenerationNumber, nil
|
||||
}
|
||||
return 0, 0, errors.New("not an indirect/stream object")
|
||||
}
|
||||
|
||||
// LookupByNumber
|
||||
// Repair signals whether to repair if broken.
|
||||
func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (PdfObject, bool, error) {
|
||||
obj, ok := parser.ObjCache[objNumber]
|
||||
if ok {
|
||||
common.Log.Trace("Returning cached object %d", objNumber)
|
||||
return obj, false, nil
|
||||
}
|
||||
|
||||
xref, ok := parser.xrefs[objNumber]
|
||||
if !ok {
|
||||
// An indirect reference to an undefined object shall not be
|
||||
// considered an error by a conforming reader; it shall be
|
||||
// treated as a reference to the null object.
|
||||
common.Log.Trace("Unable to locate object in xrefs! - Returning null object")
|
||||
var nullObj PdfObjectNull
|
||||
return &nullObj, false, nil
|
||||
}
|
||||
|
||||
common.Log.Trace("Lookup obj number %d", objNumber)
|
||||
switch xref.xtype {
|
||||
case XREF_TABLE_ENTRY:
|
||||
common.Log.Trace("xrefobj obj num %d", xref.objectNumber)
|
||||
common.Log.Trace("xrefobj gen %d", xref.generation)
|
||||
common.Log.Trace("xrefobj offset %d", xref.offset)
|
||||
|
||||
parser.rs.Seek(xref.offset, os.SEEK_SET)
|
||||
parser.reader = bufio.NewReader(parser.rs)
|
||||
|
||||
obj, err := parser.ParseIndirectObject()
|
||||
if err != nil {
|
||||
common.Log.Debug("error Failed reading xref (%s)", err)
|
||||
// Offset pointing to a non-object. Try to repair the file.
|
||||
if attemptRepairs {
|
||||
common.Log.Debug("Attempting to repair xrefs (top down)")
|
||||
xrefTable, err := parser.repairRebuildXrefsTopDown()
|
||||
if err != nil {
|
||||
common.Log.Debug("error Failed repair (%s)", err)
|
||||
return nil, false, err
|
||||
}
|
||||
parser.xrefs = *xrefTable
|
||||
return parser.lookupByNumber(objNumber, false)
|
||||
}
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
if attemptRepairs {
|
||||
// Check the object number..
|
||||
// If it does not match, then try to rebuild, i.e. loop through
|
||||
// all the items in the xref and look each one up and correct.
|
||||
realObjNum, _, _ := getObjectNumber(obj)
|
||||
if int(realObjNum) != objNumber {
|
||||
common.Log.Debug("invalid xrefs: Rebuilding")
|
||||
err := parser.rebuildXrefTable()
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
// Empty the cache.
|
||||
parser.ObjCache = ObjectCache{}
|
||||
// Try looking up again and return.
|
||||
return parser.lookupByNumberWrapper(objNumber, false)
|
||||
}
|
||||
}
|
||||
|
||||
common.Log.Trace("Returning obj")
|
||||
parser.ObjCache[objNumber] = obj
|
||||
return obj, false, nil
|
||||
case XREF_OBJECT_STREAM:
|
||||
common.Log.Trace("xref from object stream!")
|
||||
common.Log.Trace(">Load via OS!")
|
||||
common.Log.Trace("Object stream available in object %d/%d", xref.osObjNumber, xref.osObjIndex)
|
||||
|
||||
if xref.osObjNumber == objNumber {
|
||||
common.Log.Debug("error Circular reference!?!")
|
||||
return nil, true, errors.New(" Xref circular reference")
|
||||
}
|
||||
_, exists := parser.xrefs[xref.osObjNumber]
|
||||
if exists {
|
||||
optr, err := parser.lookupObjectViaOS(xref.osObjNumber, objNumber) //xref.osObjIndex)
|
||||
if err != nil {
|
||||
common.Log.Debug("error Returning ERR (%s)", err)
|
||||
return nil, true, err
|
||||
}
|
||||
common.Log.Trace("<Loaded via OS")
|
||||
parser.ObjCache[objNumber] = optr
|
||||
if parser.crypter != nil {
|
||||
// Mark as decrypted (inside object stream) for caching.
|
||||
// and avoid decrypting decrypted object.
|
||||
parser.crypter.DecryptedObjects[optr] = true
|
||||
}
|
||||
return optr, true, nil
|
||||
} else {
|
||||
common.Log.Debug("?? Belongs to a non-cross referenced object ...!")
|
||||
return nil, true, errors.New("OS belongs to a non cross referenced object")
|
||||
}
|
||||
}
|
||||
return nil, false, errors.New("unknown xref type")
|
||||
}
|
||||
|
||||
// LookupByReference looks up a PdfObject by a reference.
|
||||
func (parser *PdfParser) LookupByReference(ref PdfObjectReference) (PdfObject, error) {
|
||||
common.Log.Trace("Looking up reference %s", ref.String())
|
||||
return parser.LookupByNumber(int(ref.ObjectNumber))
|
||||
}
|
||||
|
||||
// Trace traces a PdfObject to direct object, looking up and resolving references as needed (unlike TraceToDirect).
|
||||
// TODO (v3): Unexport.
|
||||
func (parser *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
|
||||
ref, isRef := obj.(*PdfObjectReference)
|
||||
if !isRef {
|
||||
// Direct object already.
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
bakOffset := parser.GetFileOffset()
|
||||
defer func() { parser.SetFileOffset(bakOffset) }()
|
||||
|
||||
o, err := parser.LookupByReference(*ref)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
io, isInd := o.(*PdfIndirectObject)
|
||||
if !isInd {
|
||||
// Not indirect (Stream or null object).
|
||||
return o, nil
|
||||
}
|
||||
o = io.PdfObject
|
||||
_, isRef = o.(*PdfObjectReference)
|
||||
if isRef {
|
||||
return io, errors.New("multi depth trace pointer to pointer")
|
||||
}
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
func printXrefTable(xrefTable XrefTable) {
|
||||
common.Log.Debug("=X=X=X=")
|
||||
common.Log.Debug("Xref table:")
|
||||
i := 0
|
||||
for _, xref := range xrefTable {
|
||||
common.Log.Debug("i+1: %d (obj num: %d gen: %d) -> %d", i+1, xref.objectNumber, xref.generation, xref.offset)
|
||||
i++
|
||||
}
|
||||
}
|
||||
1732
internal/pdf/core/crypt.go
Normal file
1732
internal/pdf/core/crypt.go
Normal file
File diff suppressed because it is too large
Load Diff
265
internal/pdf/core/crypt_filters.go
Normal file
265
internal/pdf/core/crypt_filters.go
Normal file
@@ -0,0 +1,265 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/md5"
|
||||
"crypto/rand"
|
||||
"crypto/rc4"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
var (
|
||||
cryptMethods = make(map[string]cryptFilterMethod)
|
||||
)
|
||||
|
||||
// registerCryptFilterMethod registers a CFM.
|
||||
func registerCryptFilterMethod(m cryptFilterMethod) {
|
||||
cryptMethods[m.CFM()] = m
|
||||
}
|
||||
|
||||
// getCryptFilterMethod check if a CFM with a specified name is supported an returns its implementation.
|
||||
func getCryptFilterMethod(name string) (cryptFilterMethod, error) {
|
||||
f := cryptMethods[name]
|
||||
if f == nil {
|
||||
return nil, fmt.Errorf("unsupported crypt filter: %q", name)
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
// register supported crypt filter methods
|
||||
registerCryptFilterMethod(cryptFilterV2{})
|
||||
registerCryptFilterMethod(cryptFilterAESV2{})
|
||||
registerCryptFilterMethod(cryptFilterAESV3{})
|
||||
}
|
||||
|
||||
// cryptFilterMethod is a common interface for crypt filter methods.
|
||||
type cryptFilterMethod interface {
|
||||
// CFM returns a name of the filter that should be used in CFM field of Encrypt dictionary.
|
||||
CFM() string
|
||||
// MakeKey generates a object encryption key based on file encryption key and object numbers.
|
||||
// Used only for legacy filters - AESV3 doesn't change the key for each object.
|
||||
MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error)
|
||||
// EncryptBytes encrypts a buffer using object encryption key, as returned by MakeKey.
|
||||
// Implementation may reuse a buffer and encrypt data in-place.
|
||||
EncryptBytes(p []byte, okey []byte) ([]byte, error)
|
||||
// DecryptBytes decrypts a buffer using object encryption key, as returned by MakeKey.
|
||||
// Implementation may reuse a buffer and decrypt data in-place.
|
||||
DecryptBytes(p []byte, okey []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
// makeKeyV2 is a common object key generation shared by V2 and AESV2 crypt filters.
|
||||
func makeKeyV2(objNum, genNum uint32, ekey []byte, isAES bool) ([]byte, error) {
|
||||
key := make([]byte, len(ekey)+5)
|
||||
copy(key, ekey)
|
||||
|
||||
for i := 0; i < 3; i++ {
|
||||
b := byte((objNum >> uint32(8*i)) & 0xff)
|
||||
key[i+len(ekey)] = b
|
||||
}
|
||||
for i := 0; i < 2; i++ {
|
||||
b := byte((genNum >> uint32(8*i)) & 0xff)
|
||||
key[i+len(ekey)+3] = b
|
||||
}
|
||||
if isAES {
|
||||
// If using the AES algorithm, extend the encryption key an
|
||||
// additional 4 bytes by adding the value “sAlT”, which
|
||||
// corresponds to the hexadecimal values 0x73, 0x41, 0x6C, 0x54.
|
||||
key = append(key, 0x73)
|
||||
key = append(key, 0x41)
|
||||
key = append(key, 0x6C)
|
||||
key = append(key, 0x54)
|
||||
}
|
||||
|
||||
// Take the MD5.
|
||||
h := md5.New()
|
||||
h.Write(key)
|
||||
hashb := h.Sum(nil)
|
||||
|
||||
if len(ekey)+5 < 16 {
|
||||
return hashb[0 : len(ekey)+5], nil
|
||||
}
|
||||
|
||||
return hashb, nil
|
||||
}
|
||||
|
||||
// cryptFilterV2 is a RC4-based filter
|
||||
type cryptFilterV2 struct{}
|
||||
|
||||
func (cryptFilterV2) CFM() string {
|
||||
return CryptFilterV2
|
||||
}
|
||||
|
||||
func (f cryptFilterV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) {
|
||||
return makeKeyV2(objNum, genNum, ekey, false)
|
||||
}
|
||||
|
||||
func (cryptFilterV2) EncryptBytes(buf []byte, okey []byte) ([]byte, error) {
|
||||
// Standard RC4 algorithm.
|
||||
ciph, err := rc4.NewCipher(okey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
common.Log.Trace("RC4 Encrypt: % x", buf)
|
||||
ciph.XORKeyStream(buf, buf)
|
||||
common.Log.Trace("to: % x", buf)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func (cryptFilterV2) DecryptBytes(buf []byte, okey []byte) ([]byte, error) {
|
||||
// Standard RC4 algorithm.
|
||||
ciph, err := rc4.NewCipher(okey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
common.Log.Trace("RC4 Decrypt: % x", buf)
|
||||
ciph.XORKeyStream(buf, buf)
|
||||
common.Log.Trace("to: % x", buf)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// cryptFilterAES implements a generic AES encryption and decryption algorithm used by AESV2 and AESV3 filter methods.
|
||||
type cryptFilterAES struct{}
|
||||
|
||||
func (cryptFilterAES) EncryptBytes(buf []byte, okey []byte) ([]byte, error) {
|
||||
// Strings and streams encrypted with AES shall use a padding
|
||||
// scheme that is described in Internet RFC 2898, PKCS #5:
|
||||
// Password-Based Cryptography Specification Version 2.0; see
|
||||
// the Bibliography. For an original message length of M,
|
||||
// the pad shall consist of 16 - (M mod 16) bytes whose value
|
||||
// shall also be 16 - (M mod 16).
|
||||
//
|
||||
// A 9-byte message has a pad of 7 bytes, each with the value
|
||||
// 0x07. The pad can be unambiguously removed to determine the
|
||||
// original message length when decrypting. Note that the pad is
|
||||
// present when M is evenly divisible by 16; it contains 16 bytes
|
||||
// of 0x10.
|
||||
|
||||
ciph, err := aes.NewCipher(okey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
common.Log.Trace("AES Encrypt (%d): % x", len(buf), buf)
|
||||
|
||||
// If using the AES algorithm, the Cipher Block Chaining (CBC)
|
||||
// mode, which requires an initialization vector, is used. The
|
||||
// block size parameter is set to 16 bytes, and the initialization
|
||||
// vector is a 16-byte random number that is stored as the first
|
||||
// 16 bytes of the encrypted stream or string.
|
||||
|
||||
const block = aes.BlockSize // 16
|
||||
|
||||
pad := block - len(buf)%block
|
||||
for i := 0; i < pad; i++ {
|
||||
buf = append(buf, byte(pad))
|
||||
}
|
||||
common.Log.Trace("Padded to %d bytes", len(buf))
|
||||
|
||||
// Generate random 16 bytes, place in beginning of buffer.
|
||||
ciphertext := make([]byte, block+len(buf))
|
||||
iv := ciphertext[:block]
|
||||
if _, err := io.ReadFull(rand.Reader, iv); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mode := cipher.NewCBCEncrypter(ciph, iv)
|
||||
mode.CryptBlocks(ciphertext[block:], buf)
|
||||
|
||||
buf = ciphertext
|
||||
common.Log.Trace("to (%d): % x", len(buf), buf)
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func (cryptFilterAES) DecryptBytes(buf []byte, okey []byte) ([]byte, error) {
|
||||
// Strings and streams encrypted with AES shall use a padding
|
||||
// scheme that is described in Internet RFC 2898, PKCS #5:
|
||||
// Password-Based Cryptography Specification Version 2.0; see
|
||||
// the Bibliography. For an original message length of M,
|
||||
// the pad shall consist of 16 - (M mod 16) bytes whose value
|
||||
// shall also be 16 - (M mod 16).
|
||||
//
|
||||
// A 9-byte message has a pad of 7 bytes, each with the value
|
||||
// 0x07. The pad can be unambiguously removed to determine the
|
||||
// original message length when decrypting. Note that the pad is
|
||||
// present when M is evenly divisible by 16; it contains 16 bytes
|
||||
// of 0x10.
|
||||
|
||||
ciph, err := aes.NewCipher(okey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If using the AES algorithm, the Cipher Block Chaining (CBC)
|
||||
// mode, which requires an initialization vector, is used. The
|
||||
// block size parameter is set to 16 bytes, and the initialization
|
||||
// vector is a 16-byte random number that is stored as the first
|
||||
// 16 bytes of the encrypted stream or string.
|
||||
if len(buf) < 16 {
|
||||
common.Log.Debug("error AES invalid buf %s", buf)
|
||||
return buf, fmt.Errorf("AES: Buf len < 16 (%d)", len(buf))
|
||||
}
|
||||
|
||||
iv := buf[:16]
|
||||
buf = buf[16:]
|
||||
|
||||
if len(buf)%16 != 0 {
|
||||
common.Log.Debug(" iv (%d): % x", len(iv), iv)
|
||||
common.Log.Debug("buf (%d): % x", len(buf), buf)
|
||||
return buf, fmt.Errorf("AES buf length not multiple of 16 (%d)", len(buf))
|
||||
}
|
||||
|
||||
mode := cipher.NewCBCDecrypter(ciph, iv)
|
||||
|
||||
common.Log.Trace("AES Decrypt (%d): % x", len(buf), buf)
|
||||
common.Log.Trace("chop AES Decrypt (%d): % x", len(buf), buf)
|
||||
mode.CryptBlocks(buf, buf)
|
||||
common.Log.Trace("to (%d): % x", len(buf), buf)
|
||||
|
||||
if len(buf) == 0 {
|
||||
common.Log.Trace("Empty buf, returning empty string")
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// The padded length is indicated by the last values. Remove those.
|
||||
|
||||
padLen := int(buf[len(buf)-1])
|
||||
if padLen >= len(buf) {
|
||||
common.Log.Debug("Illegal pad length")
|
||||
return buf, fmt.Errorf("invalid pad length")
|
||||
}
|
||||
buf = buf[:len(buf)-padLen]
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// cryptFilterAESV2 is an AES-based filter (128 bit key, PDF 1.6)
|
||||
type cryptFilterAESV2 struct {
|
||||
cryptFilterAES
|
||||
}
|
||||
|
||||
func (cryptFilterAESV2) CFM() string {
|
||||
return CryptFilterAESV2
|
||||
}
|
||||
|
||||
func (cryptFilterAESV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) {
|
||||
return makeKeyV2(objNum, genNum, ekey, true)
|
||||
}
|
||||
|
||||
// cryptFilterAESV3 is an AES-based filter (256 bit key, PDF 2.0)
|
||||
type cryptFilterAESV3 struct {
|
||||
cryptFilterAES
|
||||
}
|
||||
|
||||
func (cryptFilterAESV3) CFM() string {
|
||||
return CryptFilterAESV3
|
||||
}
|
||||
|
||||
func (cryptFilterAESV3) MakeKey(_, _ uint32, ekey []byte) ([]byte, error) {
|
||||
return ekey, nil
|
||||
}
|
||||
61
internal/pdf/core/ecb.go
Normal file
61
internal/pdf/core/ecb.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package core
|
||||
|
||||
import "crypto/cipher"
|
||||
|
||||
// ecb implements an Electronic Codebook encryption mode.
|
||||
// This mode is used to compute or validate document permissions for R=6.
|
||||
type ecb struct {
|
||||
b cipher.Block
|
||||
blockSize int
|
||||
}
|
||||
|
||||
func newECB(b cipher.Block) *ecb {
|
||||
return &ecb{
|
||||
b: b,
|
||||
blockSize: b.BlockSize(),
|
||||
}
|
||||
}
|
||||
|
||||
type ecbEncrypter ecb
|
||||
|
||||
func newECBEncrypter(b cipher.Block) cipher.BlockMode {
|
||||
return (*ecbEncrypter)(newECB(b))
|
||||
}
|
||||
|
||||
func (x *ecbEncrypter) BlockSize() int { return x.blockSize }
|
||||
|
||||
func (x *ecbEncrypter) CryptBlocks(dst, src []byte) {
|
||||
if len(src)%x.blockSize != 0 {
|
||||
panic("crypto/cipher: input not full blocks")
|
||||
}
|
||||
if len(dst) < len(src) {
|
||||
panic("crypto/cipher: output smaller than input")
|
||||
}
|
||||
for len(src) > 0 {
|
||||
x.b.Encrypt(dst, src[:x.blockSize])
|
||||
src = src[x.blockSize:]
|
||||
dst = dst[x.blockSize:]
|
||||
}
|
||||
}
|
||||
|
||||
type ecbDecrypter ecb
|
||||
|
||||
func newECBDecrypter(b cipher.Block) cipher.BlockMode {
|
||||
return (*ecbDecrypter)(newECB(b))
|
||||
}
|
||||
|
||||
func (x *ecbDecrypter) BlockSize() int { return x.blockSize }
|
||||
|
||||
func (x *ecbDecrypter) CryptBlocks(dst, src []byte) {
|
||||
if len(src)%x.blockSize != 0 {
|
||||
panic("crypto/cipher: input not full blocks")
|
||||
}
|
||||
if len(dst) < len(src) {
|
||||
panic("crypto/cipher: output smaller than input")
|
||||
}
|
||||
for len(src) > 0 {
|
||||
x.b.Decrypt(dst, src[:x.blockSize])
|
||||
src = src[x.blockSize:]
|
||||
dst = dst[x.blockSize:]
|
||||
}
|
||||
}
|
||||
1846
internal/pdf/core/encoding.go
Normal file
1846
internal/pdf/core/encoding.go
Normal file
File diff suppressed because it is too large
Load Diff
44
internal/pdf/core/io.go
Normal file
44
internal/pdf/core/io.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"os"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
// ReadAtLeast reads at least n bytes into slice p.
|
||||
// Returns the number of bytes read (should always be == n), and an error on failure.
|
||||
// TODO (v3): Unexport.
|
||||
func (parser *PdfParser) ReadAtLeast(p []byte, n int) (int, error) {
|
||||
remaining := n
|
||||
start := 0
|
||||
numRounds := 0
|
||||
for remaining > 0 {
|
||||
nRead, err := parser.reader.Read(p[start:])
|
||||
if err != nil {
|
||||
common.Log.Debug("error Failed reading (%d;%d) %s", nRead, numRounds, err.Error())
|
||||
return start, errors.New("failed reading")
|
||||
}
|
||||
numRounds++
|
||||
start += nRead
|
||||
remaining -= nRead
|
||||
}
|
||||
return start, nil
|
||||
}
|
||||
|
||||
// Get the current file offset, accounting for buffered position.
|
||||
// TODO (v3): Unexport.
|
||||
func (parser *PdfParser) GetFileOffset() int64 {
|
||||
offset, _ := parser.rs.Seek(0, os.SEEK_CUR)
|
||||
offset -= int64(parser.reader.Buffered())
|
||||
return offset
|
||||
}
|
||||
|
||||
// Seek the file to an offset position.
|
||||
// TODO (v3): Unexport.
|
||||
func (parser *PdfParser) SetFileOffset(offset int64) {
|
||||
parser.rs.Seek(offset, os.SEEK_SET)
|
||||
parser.reader = bufio.NewReader(parser.rs)
|
||||
}
|
||||
1644
internal/pdf/core/parser.go
Normal file
1644
internal/pdf/core/parser.go
Normal file
File diff suppressed because it is too large
Load Diff
570
internal/pdf/core/primitives.go
Normal file
570
internal/pdf/core/primitives.go
Normal file
@@ -0,0 +1,570 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
// PdfObject is an interface which all primitive PDF objects must implement.
|
||||
type PdfObject interface {
|
||||
// Output a string representation of the primitive (for debugging).
|
||||
String() string
|
||||
|
||||
// Output the PDF primitive as written to file as expected by the standard.
|
||||
DefaultWriteString() string
|
||||
}
|
||||
|
||||
// PdfObjectBool represents the primitive PDF boolean object.
|
||||
type PdfObjectBool bool
|
||||
|
||||
// PdfObjectInteger represents the primitive PDF integer numerical object.
|
||||
type PdfObjectInteger int64
|
||||
|
||||
// PdfObjectFloat represents the primitive PDF floating point numerical object.
|
||||
type PdfObjectFloat float64
|
||||
|
||||
// PdfObjectString represents the primitive PDF string object.
|
||||
// TODO (v3): Change to a struct and add a flag for hex/plaintext.
|
||||
type PdfObjectString string
|
||||
|
||||
// PdfObjectName represents the primitive PDF name object.
|
||||
type PdfObjectName string
|
||||
|
||||
// PdfObjectArray represents the primitive PDF array object.
|
||||
type PdfObjectArray []PdfObject
|
||||
|
||||
// PdfObjectDictionary represents the primitive PDF dictionary/map object.
|
||||
type PdfObjectDictionary struct {
|
||||
dict map[PdfObjectName]PdfObject
|
||||
keys []PdfObjectName
|
||||
}
|
||||
|
||||
// PdfObjectNull represents the primitive PDF null object.
|
||||
type PdfObjectNull struct{}
|
||||
|
||||
// PdfObjectReference represents the primitive PDF reference object.
|
||||
type PdfObjectReference struct {
|
||||
ObjectNumber int64
|
||||
GenerationNumber int64
|
||||
}
|
||||
|
||||
// PdfIndirectObject represents the primitive PDF indirect object.
|
||||
type PdfIndirectObject struct {
|
||||
PdfObjectReference
|
||||
PdfObject
|
||||
}
|
||||
|
||||
// PdfObjectStream represents the primitive PDF Object stream.
|
||||
type PdfObjectStream struct {
|
||||
PdfObjectReference
|
||||
*PdfObjectDictionary
|
||||
Stream []byte
|
||||
}
|
||||
|
||||
// MakeDict creates and returns an empty PdfObjectDictionary.
|
||||
func MakeDict() *PdfObjectDictionary {
|
||||
d := &PdfObjectDictionary{}
|
||||
d.dict = map[PdfObjectName]PdfObject{}
|
||||
d.keys = []PdfObjectName{}
|
||||
return d
|
||||
}
|
||||
|
||||
// MakeName creates a PdfObjectName from a string.
|
||||
func MakeName(s string) *PdfObjectName {
|
||||
name := PdfObjectName(s)
|
||||
return &name
|
||||
}
|
||||
|
||||
// MakeInteger creates a PdfObjectInteger from an int64.
|
||||
func MakeInteger(val int64) *PdfObjectInteger {
|
||||
num := PdfObjectInteger(val)
|
||||
return &num
|
||||
}
|
||||
|
||||
// MakeArray creates an PdfObjectArray from a list of PdfObjects.
|
||||
func MakeArray(objects ...PdfObject) *PdfObjectArray {
|
||||
array := PdfObjectArray{}
|
||||
for _, obj := range objects {
|
||||
array = append(array, obj)
|
||||
}
|
||||
return &array
|
||||
}
|
||||
|
||||
// MakeArrayFromIntegers creates an PdfObjectArray from a slice of ints, where each array element is
|
||||
// an PdfObjectInteger.
|
||||
func MakeArrayFromIntegers(vals []int) *PdfObjectArray {
|
||||
array := PdfObjectArray{}
|
||||
for _, val := range vals {
|
||||
array = append(array, MakeInteger(int64(val)))
|
||||
}
|
||||
return &array
|
||||
}
|
||||
|
||||
// MakeArrayFromIntegers64 creates an PdfObjectArray from a slice of int64s, where each array element
|
||||
// is an PdfObjectInteger.
|
||||
func MakeArrayFromIntegers64(vals []int64) *PdfObjectArray {
|
||||
array := PdfObjectArray{}
|
||||
for _, val := range vals {
|
||||
array = append(array, MakeInteger(val))
|
||||
}
|
||||
return &array
|
||||
}
|
||||
|
||||
// MakeArrayFromFloats creates an PdfObjectArray from a slice of float64s, where each array element is an
|
||||
// PdfObjectFloat.
|
||||
func MakeArrayFromFloats(vals []float64) *PdfObjectArray {
|
||||
array := PdfObjectArray{}
|
||||
for _, val := range vals {
|
||||
array = append(array, MakeFloat(val))
|
||||
}
|
||||
return &array
|
||||
}
|
||||
|
||||
// MakeBool creates an PdfObjectBool from a bool.
|
||||
func MakeBool(val bool) *PdfObjectBool {
|
||||
v := PdfObjectBool(val)
|
||||
return &v
|
||||
}
|
||||
|
||||
// MakeFloat creates an PdfObjectFloat from a float64.
|
||||
func MakeFloat(val float64) *PdfObjectFloat {
|
||||
num := PdfObjectFloat(val)
|
||||
return &num
|
||||
}
|
||||
|
||||
// MakeString creates an PdfObjectString from a string.
|
||||
func MakeString(s string) *PdfObjectString {
|
||||
str := PdfObjectString(s)
|
||||
return &str
|
||||
}
|
||||
|
||||
// MakeNull creates an PdfObjectNull.
|
||||
func MakeNull() *PdfObjectNull {
|
||||
null := PdfObjectNull{}
|
||||
return &null
|
||||
}
|
||||
|
||||
// MakeIndirectObject creates an PdfIndirectObject with a specified direct object PdfObject.
|
||||
func MakeIndirectObject(obj PdfObject) *PdfIndirectObject {
|
||||
ind := &PdfIndirectObject{}
|
||||
ind.PdfObject = obj
|
||||
return ind
|
||||
}
|
||||
|
||||
// MakeStream creates an PdfObjectStream with specified contents and encoding. If encoding is nil, then raw encoding
|
||||
// will be used (i.e. no encoding applied).
|
||||
func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error) {
|
||||
stream := &PdfObjectStream{}
|
||||
|
||||
if encoder == nil {
|
||||
encoder = NewRawEncoder()
|
||||
}
|
||||
|
||||
stream.PdfObjectDictionary = encoder.MakeStreamDict()
|
||||
|
||||
encoded, err := encoder.EncodeBytes(contents)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stream.PdfObjectDictionary.Set("Length", MakeInteger(int64(len(encoded))))
|
||||
|
||||
stream.Stream = encoded
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
func (bool *PdfObjectBool) String() string {
|
||||
if *bool {
|
||||
return "true"
|
||||
} else {
|
||||
return "false"
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (bool *PdfObjectBool) DefaultWriteString() string {
|
||||
if *bool {
|
||||
return "true"
|
||||
} else {
|
||||
return "false"
|
||||
}
|
||||
}
|
||||
|
||||
func (int *PdfObjectInteger) String() string {
|
||||
return fmt.Sprintf("%d", *int)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (int *PdfObjectInteger) DefaultWriteString() string {
|
||||
return fmt.Sprintf("%d", *int)
|
||||
}
|
||||
|
||||
func (float *PdfObjectFloat) String() string {
|
||||
return fmt.Sprintf("%f", *float)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (float *PdfObjectFloat) DefaultWriteString() string {
|
||||
return fmt.Sprintf("%f", *float)
|
||||
}
|
||||
|
||||
func (str *PdfObjectString) String() string {
|
||||
return string(*str)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (str *PdfObjectString) DefaultWriteString() string {
|
||||
var output bytes.Buffer
|
||||
|
||||
escapeSequences := map[byte]string{
|
||||
'\n': "\\n",
|
||||
'\r': "\\r",
|
||||
'\t': "\\t",
|
||||
'\b': "\\b",
|
||||
'\f': "\\f",
|
||||
'(': "\\(",
|
||||
')': "\\)",
|
||||
'\\': "\\\\",
|
||||
}
|
||||
|
||||
output.WriteString("(")
|
||||
for i := 0; i < len(*str); i++ {
|
||||
char := (*str)[i]
|
||||
if escStr, useEsc := escapeSequences[char]; useEsc {
|
||||
output.WriteString(escStr)
|
||||
} else {
|
||||
output.WriteByte(char)
|
||||
}
|
||||
}
|
||||
output.WriteString(")")
|
||||
|
||||
return output.String()
|
||||
}
|
||||
|
||||
func (name *PdfObjectName) String() string {
|
||||
return string(*name)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (name *PdfObjectName) DefaultWriteString() string {
|
||||
var output bytes.Buffer
|
||||
|
||||
if len(*name) > 127 {
|
||||
common.Log.Debug("error: Name too long (%s)", *name)
|
||||
}
|
||||
|
||||
output.WriteString("/")
|
||||
for i := 0; i < len(*name); i++ {
|
||||
char := (*name)[i]
|
||||
if !IsPrintable(char) || char == '#' || IsDelimiter(char) {
|
||||
output.WriteString(fmt.Sprintf("#%.2x", char))
|
||||
} else {
|
||||
output.WriteByte(char)
|
||||
}
|
||||
}
|
||||
|
||||
return output.String()
|
||||
}
|
||||
|
||||
// ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is returned if the array
|
||||
// contains non-numeric objects (each element can be either PdfObjectInteger or PdfObjectFloat).
|
||||
func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) {
|
||||
vals := []float64{}
|
||||
|
||||
for _, obj := range *array {
|
||||
if number, is := obj.(*PdfObjectInteger); is {
|
||||
vals = append(vals, float64(*number))
|
||||
} else if number, is := obj.(*PdfObjectFloat); is {
|
||||
vals = append(vals, float64(*number))
|
||||
} else {
|
||||
return nil, fmt.Errorf("type error")
|
||||
}
|
||||
}
|
||||
|
||||
return vals, nil
|
||||
}
|
||||
|
||||
// ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the array contains
|
||||
// non-integer objects. Each element can only be PdfObjectInteger.
|
||||
func (array *PdfObjectArray) ToIntegerArray() ([]int, error) {
|
||||
vals := []int{}
|
||||
|
||||
for _, obj := range *array {
|
||||
if number, is := obj.(*PdfObjectInteger); is {
|
||||
vals = append(vals, int(*number))
|
||||
} else {
|
||||
return nil, fmt.Errorf("type error")
|
||||
}
|
||||
}
|
||||
|
||||
return vals, nil
|
||||
}
|
||||
|
||||
func (array *PdfObjectArray) String() string {
|
||||
outStr := "["
|
||||
for ind, o := range *array {
|
||||
outStr += o.String()
|
||||
if ind < (len(*array) - 1) {
|
||||
outStr += ", "
|
||||
}
|
||||
}
|
||||
outStr += "]"
|
||||
return outStr
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (array *PdfObjectArray) DefaultWriteString() string {
|
||||
outStr := "["
|
||||
for ind, o := range *array {
|
||||
outStr += o.DefaultWriteString()
|
||||
if ind < (len(*array) - 1) {
|
||||
outStr += " "
|
||||
}
|
||||
}
|
||||
outStr += "]"
|
||||
return outStr
|
||||
}
|
||||
|
||||
// Append adds an PdfObject to the array.
|
||||
func (array *PdfObjectArray) Append(obj PdfObject) {
|
||||
*array = append(*array, obj)
|
||||
}
|
||||
|
||||
func getNumberAsFloat(obj PdfObject) (float64, error) {
|
||||
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
||||
return float64(*fObj), nil
|
||||
}
|
||||
|
||||
if iObj, ok := obj.(*PdfObjectInteger); ok {
|
||||
return float64(*iObj), nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("not a number")
|
||||
}
|
||||
|
||||
// GetAsFloat64Slice returns the array as []float64 slice.
|
||||
// Returns an error if not entirely numeric (only PdfObjectIntegers, PdfObjectFloats).
|
||||
func (array *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) {
|
||||
slice := []float64{}
|
||||
|
||||
for _, obj := range *array {
|
||||
obj := TraceToDirectObject(obj)
|
||||
number, err := getNumberAsFloat(obj)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("array element not a number")
|
||||
}
|
||||
slice = append(slice, number)
|
||||
}
|
||||
|
||||
return slice, nil
|
||||
}
|
||||
|
||||
// Merge merges in key/values from another dictionary. Overwriting if has same keys.
|
||||
func (d *PdfObjectDictionary) Merge(another *PdfObjectDictionary) {
|
||||
if another != nil {
|
||||
for _, key := range another.Keys() {
|
||||
val := another.Get(key)
|
||||
d.Set(key, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *PdfObjectDictionary) String() string {
|
||||
outStr := "Dict("
|
||||
for _, k := range d.keys {
|
||||
v := d.dict[k]
|
||||
outStr += fmt.Sprintf("\"%s\": %s, ", k, v.String())
|
||||
}
|
||||
outStr += ")"
|
||||
return outStr
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (d *PdfObjectDictionary) DefaultWriteString() string {
|
||||
outStr := "<<"
|
||||
for _, k := range d.keys {
|
||||
v := d.dict[k]
|
||||
common.Log.Trace("Writing k: %s %T %v %v", k, v, k, v)
|
||||
outStr += k.DefaultWriteString()
|
||||
outStr += " "
|
||||
outStr += v.DefaultWriteString()
|
||||
}
|
||||
outStr += ">>"
|
||||
return outStr
|
||||
}
|
||||
|
||||
// Set sets the dictionary's key -> val mapping entry. Overwrites if key already set.
|
||||
func (d *PdfObjectDictionary) Set(key PdfObjectName, val PdfObject) {
|
||||
found := false
|
||||
for _, k := range d.keys {
|
||||
if k == key {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
d.keys = append(d.keys, key)
|
||||
}
|
||||
|
||||
d.dict[key] = val
|
||||
}
|
||||
|
||||
// Get returns the PdfObject corresponding to the specified key.
|
||||
// Returns a nil value if the key is not set.
|
||||
//
|
||||
// The design is such that we only return 1 value.
|
||||
// The reason is that, it will be easy to do type casts such as
|
||||
// name, ok := dict.Get("mykey").(*PdfObjectName)
|
||||
// if !ok ....
|
||||
func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject {
|
||||
val, has := d.dict[key]
|
||||
if !has {
|
||||
return nil
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
// Keys returns the list of keys in the dictionary.
|
||||
func (d *PdfObjectDictionary) Keys() []PdfObjectName {
|
||||
return d.keys
|
||||
}
|
||||
|
||||
// Remove removes an element specified by key.
|
||||
func (d *PdfObjectDictionary) Remove(key PdfObjectName) {
|
||||
idx := -1
|
||||
for i, k := range d.keys {
|
||||
if k == key {
|
||||
idx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if idx >= 0 {
|
||||
// Found. Remove from key list and map.
|
||||
d.keys = append(d.keys[:idx], d.keys[idx+1:]...)
|
||||
delete(d.dict, key)
|
||||
}
|
||||
}
|
||||
|
||||
// SetIfNotNil sets the dictionary's key -> val mapping entry -IF- val is not nil.
|
||||
// Note that we take care to perform a type switch. Otherwise if we would supply a nil value
|
||||
// of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus
|
||||
// would get set.
|
||||
func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) {
|
||||
if val != nil {
|
||||
switch t := val.(type) {
|
||||
case *PdfObjectName:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectDictionary:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectStream:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectString:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectNull:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectInteger:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectArray:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectBool:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectFloat:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfObjectReference:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
case *PdfIndirectObject:
|
||||
if t != nil {
|
||||
d.Set(key, val)
|
||||
}
|
||||
default:
|
||||
common.Log.Error("error: Unknown type: %T - should never happen!", val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ref *PdfObjectReference) String() string {
|
||||
return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (ref *PdfObjectReference) DefaultWriteString() string {
|
||||
return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber)
|
||||
}
|
||||
|
||||
func (ind *PdfIndirectObject) String() string {
|
||||
// Avoid printing out the object, can cause problems with circular
|
||||
// references.
|
||||
return fmt.Sprintf("IObject:%d", (*ind).ObjectNumber)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (ind *PdfIndirectObject) DefaultWriteString() string {
|
||||
outStr := fmt.Sprintf("%d 0 R", (*ind).ObjectNumber)
|
||||
return outStr
|
||||
}
|
||||
|
||||
func (stream *PdfObjectStream) String() string {
|
||||
return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (stream *PdfObjectStream) DefaultWriteString() string {
|
||||
outStr := fmt.Sprintf("%d 0 R", (*stream).ObjectNumber)
|
||||
return outStr
|
||||
}
|
||||
|
||||
func (null *PdfObjectNull) String() string {
|
||||
return "null"
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (null *PdfObjectNull) DefaultWriteString() string {
|
||||
return "null"
|
||||
}
|
||||
|
||||
// Handy functions to work with primitive objects.
|
||||
|
||||
// TraceMaxDepth specifies the maximum recursion depth allowed.
|
||||
const TraceMaxDepth = 20
|
||||
|
||||
// TraceToDirectObject traces a PdfObject to a direct object. For example direct objects contained
|
||||
// in indirect objects (can be double referenced even).
|
||||
//
|
||||
// Note: This function does not trace/resolve references. That needs to be done beforehand.
|
||||
func TraceToDirectObject(obj PdfObject) PdfObject {
|
||||
iobj, isIndirectObj := obj.(*PdfIndirectObject)
|
||||
depth := 0
|
||||
for isIndirectObj {
|
||||
obj = iobj.PdfObject
|
||||
iobj, isIndirectObj = obj.(*PdfIndirectObject)
|
||||
depth++
|
||||
if depth > TraceMaxDepth {
|
||||
common.Log.Error("error: Trace depth level beyond %d - not going deeper!", TraceMaxDepth)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return obj
|
||||
}
|
||||
281
internal/pdf/core/repairs.go
Normal file
281
internal/pdf/core/repairs.go
Normal file
@@ -0,0 +1,281 @@
|
||||
// Routines related to repairing malformed pdf files.
|
||||
|
||||
package core
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
|
||||
"bufio"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
var repairReXrefTable = regexp.MustCompile(`[\r\n]\s*(xref)\s*[\r\n]`)
|
||||
|
||||
// Locates a standard Xref table by looking for the "xref" entry.
|
||||
// Xref object stream not supported.
|
||||
func (parser *PdfParser) repairLocateXref() (int64, error) {
|
||||
readBuf := int64(1000)
|
||||
parser.rs.Seek(-readBuf, os.SEEK_CUR)
|
||||
|
||||
curOffset, err := parser.rs.Seek(0, os.SEEK_CUR)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b2 := make([]byte, readBuf)
|
||||
parser.rs.Read(b2)
|
||||
|
||||
results := repairReXrefTable.FindAllStringIndex(string(b2), -1)
|
||||
if len(results) < 1 {
|
||||
common.Log.Debug("error: Repair: xref not found!")
|
||||
return 0, errors.New("repair: xref not found")
|
||||
}
|
||||
|
||||
localOffset := int64(results[len(results)-1][0])
|
||||
xrefOffset := curOffset + localOffset
|
||||
return xrefOffset, nil
|
||||
}
|
||||
|
||||
// Renumbers the xref table.
|
||||
// Useful when the cross reference is pointing to an object with the wrong number.
|
||||
// Update the table.
|
||||
func (parser *PdfParser) rebuildXrefTable() error {
|
||||
newXrefs := XrefTable{}
|
||||
for objNum, xref := range parser.xrefs {
|
||||
obj, _, err := parser.lookupByNumberWrapper(objNum, false)
|
||||
if err != nil {
|
||||
common.Log.Debug("error: Unable to look up object (%s)", err)
|
||||
common.Log.Debug("error: Xref table completely broken - attempting to repair ")
|
||||
xrefTable, err := parser.repairRebuildXrefsTopDown()
|
||||
if err != nil {
|
||||
common.Log.Debug("error: Failed xref rebuild repair (%s)", err)
|
||||
return err
|
||||
}
|
||||
parser.xrefs = *xrefTable
|
||||
common.Log.Debug("Repaired xref table built")
|
||||
return nil
|
||||
}
|
||||
actObjNum, actGenNum, err := getObjectNumber(obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
xref.objectNumber = int(actObjNum)
|
||||
xref.generation = int(actGenNum)
|
||||
newXrefs[int(actObjNum)] = xref
|
||||
}
|
||||
|
||||
parser.xrefs = newXrefs
|
||||
common.Log.Debug("New xref table built")
|
||||
printXrefTable(parser.xrefs)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parses and returns the object and generation number from a string such as "12 0 obj" -> (12,0,nil).
|
||||
func parseObjectNumberFromString(str string) (int, int, error) {
|
||||
result := reIndirectObject.FindStringSubmatch(str)
|
||||
if len(result) < 3 {
|
||||
return 0, 0, errors.New("unable to detect indirect object signature")
|
||||
}
|
||||
|
||||
on, _ := strconv.Atoi(result[1])
|
||||
gn, _ := strconv.Atoi(result[2])
|
||||
|
||||
return on, gn, nil
|
||||
}
|
||||
|
||||
// Parse the entire file from top down.
|
||||
// Goes through the file byte-by-byte looking for "<num> <generation> obj" patterns.
|
||||
// N.B. This collects the XREF_TABLE_ENTRY data only.
|
||||
func (parser *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
|
||||
if parser.repairsAttempted {
|
||||
// Avoid multiple repairs (only try once).
|
||||
return nil, fmt.Errorf("repair failed")
|
||||
}
|
||||
parser.repairsAttempted = true
|
||||
|
||||
// Go to beginning, reset reader.
|
||||
parser.rs.Seek(0, os.SEEK_SET)
|
||||
parser.reader = bufio.NewReader(parser.rs)
|
||||
|
||||
// Keep a running buffer of last bytes.
|
||||
bufLen := 20
|
||||
last := make([]byte, bufLen)
|
||||
|
||||
xrefTable := XrefTable{}
|
||||
for {
|
||||
b, err := parser.reader.ReadByte()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Format:
|
||||
// object number - whitespace - generation number - obj
|
||||
// e.g. "12 0 obj"
|
||||
if b == 'j' && last[bufLen-1] == 'b' && last[bufLen-2] == 'o' && IsWhiteSpace(last[bufLen-3]) {
|
||||
i := bufLen - 4
|
||||
// Go past whitespace
|
||||
for IsWhiteSpace(last[i]) && i > 0 {
|
||||
i--
|
||||
}
|
||||
if i == 0 || !IsDecimalDigit(last[i]) {
|
||||
continue
|
||||
}
|
||||
// Go past generation number
|
||||
for IsDecimalDigit(last[i]) && i > 0 {
|
||||
i--
|
||||
}
|
||||
if i == 0 || !IsWhiteSpace(last[i]) {
|
||||
continue
|
||||
}
|
||||
// Go past whitespace
|
||||
for IsWhiteSpace(last[i]) && i > 0 {
|
||||
i--
|
||||
}
|
||||
if i == 0 || !IsDecimalDigit(last[i]) {
|
||||
continue
|
||||
}
|
||||
// Go past object number.
|
||||
for IsDecimalDigit(last[i]) && i > 0 {
|
||||
i--
|
||||
}
|
||||
if i == 0 {
|
||||
continue // Probably too long to be a valid object...
|
||||
}
|
||||
|
||||
objOffset := parser.GetFileOffset() - int64(bufLen-i)
|
||||
|
||||
objstr := append(last[i+1:], b)
|
||||
objNum, genNum, err := parseObjectNumberFromString(string(objstr))
|
||||
if err != nil {
|
||||
common.Log.Debug("Unable to parse object number: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create and insert the XREF entry if not existing, or the generation number is higher.
|
||||
if curXref, has := xrefTable[objNum]; !has || curXref.generation < genNum {
|
||||
// Make the entry for the cross ref table.
|
||||
xrefEntry := XrefObject{}
|
||||
xrefEntry.xtype = XREF_TABLE_ENTRY
|
||||
xrefEntry.objectNumber = int(objNum)
|
||||
xrefEntry.generation = int(genNum)
|
||||
xrefEntry.offset = objOffset
|
||||
xrefTable[objNum] = xrefEntry
|
||||
}
|
||||
}
|
||||
|
||||
last = append(last[1:bufLen], b)
|
||||
}
|
||||
|
||||
return &xrefTable, nil
|
||||
}
|
||||
|
||||
// Look for first sign of xref table from end of file.
|
||||
func (parser *PdfParser) repairSeekXrefMarker() error {
|
||||
// Get the file size.
|
||||
fSize, err := parser.rs.Seek(0, os.SEEK_END)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
reXrefTableStart := regexp.MustCompile(`\sxref\s*`)
|
||||
|
||||
// Define the starting point (from the end of the file) to search from.
|
||||
var offset int64 = 0
|
||||
|
||||
// Define an buffer length in terms of how many bytes to read from the end of the file.
|
||||
var buflen int64 = 1000
|
||||
|
||||
for offset < fSize {
|
||||
if fSize <= (buflen + offset) {
|
||||
buflen = fSize - offset
|
||||
}
|
||||
|
||||
// Move back enough (as we need to read forward).
|
||||
_, err := parser.rs.Seek(-offset-buflen, os.SEEK_END)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Read the data.
|
||||
b1 := make([]byte, buflen)
|
||||
parser.rs.Read(b1)
|
||||
|
||||
common.Log.Trace("Looking for xref : \"%s\"", string(b1))
|
||||
ind := reXrefTableStart.FindAllStringIndex(string(b1), -1)
|
||||
if ind != nil {
|
||||
// Found it.
|
||||
lastInd := ind[len(ind)-1]
|
||||
common.Log.Trace("Ind: % d", ind)
|
||||
parser.rs.Seek(-offset-buflen+int64(lastInd[0]), os.SEEK_END)
|
||||
parser.reader = bufio.NewReader(parser.rs)
|
||||
// Go past whitespace, finish at 'x'.
|
||||
for {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
common.Log.Trace("B: %d %c", bb[0], bb[0])
|
||||
if !IsWhiteSpace(bb[0]) {
|
||||
break
|
||||
}
|
||||
parser.reader.Discard(1)
|
||||
}
|
||||
|
||||
return nil
|
||||
} else {
|
||||
common.Log.Debug("warning: EOF marker not found! - continue seeking")
|
||||
}
|
||||
|
||||
offset += buflen
|
||||
}
|
||||
|
||||
common.Log.Debug("error: Xref table marker was not found.")
|
||||
return errors.New("xref not found ")
|
||||
}
|
||||
|
||||
// Called when Pdf version not found normally. Looks for the PDF version by scanning top-down.
|
||||
// %PDF-1.7
|
||||
func (parser *PdfParser) seekPdfVersionTopDown() (int, int, error) {
|
||||
// Go to beginning, reset reader.
|
||||
parser.rs.Seek(0, os.SEEK_SET)
|
||||
parser.reader = bufio.NewReader(parser.rs)
|
||||
|
||||
// Keep a running buffer of last bytes.
|
||||
bufLen := 20
|
||||
last := make([]byte, bufLen)
|
||||
|
||||
for {
|
||||
b, err := parser.reader.ReadByte()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else {
|
||||
return 0, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// Format:
|
||||
// object number - whitespace - generation number - obj
|
||||
// e.g. "12 0 obj"
|
||||
if IsDecimalDigit(b) && last[bufLen-1] == '.' && IsDecimalDigit(last[bufLen-2]) && last[bufLen-3] == '-' &&
|
||||
last[bufLen-4] == 'F' && last[bufLen-5] == 'D' && last[bufLen-6] == 'P' {
|
||||
major := int(last[bufLen-2] - '0')
|
||||
minor := int(b - '0')
|
||||
return major, minor, nil
|
||||
}
|
||||
|
||||
last = append(last[1:bufLen], b)
|
||||
}
|
||||
|
||||
return 0, 0, errors.New("version not found")
|
||||
}
|
||||
129
internal/pdf/core/stream.go
Normal file
129
internal/pdf/core/stream.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
// NewEncoderFromStream creates a StreamEncoder based on the stream's dictionary.
|
||||
func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
|
||||
filterObj := TraceToDirectObject(streamObj.PdfObjectDictionary.Get("Filter"))
|
||||
if filterObj == nil {
|
||||
// No filter, return raw data back.
|
||||
return NewRawEncoder(), nil
|
||||
}
|
||||
|
||||
if _, isNull := filterObj.(*PdfObjectNull); isNull {
|
||||
// Filter is null -> raw data.
|
||||
return NewRawEncoder(), nil
|
||||
}
|
||||
|
||||
// The filter should be a name or an array with a list of filter names.
|
||||
method, ok := filterObj.(*PdfObjectName)
|
||||
if !ok {
|
||||
array, ok := filterObj.(*PdfObjectArray)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("filter not a Name or Array object")
|
||||
}
|
||||
if len(*array) == 0 {
|
||||
// Empty array -> indicates raw filter (no filter).
|
||||
return NewRawEncoder(), nil
|
||||
}
|
||||
|
||||
if len(*array) != 1 {
|
||||
menc, err := newMultiEncoderFromStream(streamObj)
|
||||
if err != nil {
|
||||
common.Log.Error("Failed creating multi encoder: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
common.Log.Trace("Multi enc: %s\n", menc)
|
||||
return menc, nil
|
||||
}
|
||||
|
||||
// Single element.
|
||||
filterObj = (*array)[0]
|
||||
method, ok = filterObj.(*PdfObjectName)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("filter array member not a Name object")
|
||||
}
|
||||
}
|
||||
|
||||
switch *method {
|
||||
case StreamEncodingFilterNameFlate:
|
||||
return newFlateEncoderFromStream(streamObj, nil)
|
||||
case StreamEncodingFilterNameLZW:
|
||||
return newLZWEncoderFromStream(streamObj, nil)
|
||||
case StreamEncodingFilterNameDCT:
|
||||
return newDCTEncoderFromStream(streamObj, nil)
|
||||
case StreamEncodingFilterNameRunLength:
|
||||
return newRunLengthEncoderFromStream()
|
||||
case StreamEncodingFilterNameASCIIHex:
|
||||
return NewASCIIHexEncoder(), nil
|
||||
case StreamEncodingFilterNameASCII85, "A85":
|
||||
return NewASCII85Encoder(), nil
|
||||
case StreamEncodingFilterNameCCITTFax:
|
||||
return NewCCITTFaxEncoder(), nil
|
||||
case StreamEncodingFilterNameJBIG2:
|
||||
return NewJBIG2Encoder(), nil
|
||||
case StreamEncodingFilterNameJPX:
|
||||
return NewJPXEncoder(), nil
|
||||
default:
|
||||
common.Log.Debug("error: Unsupported encoding method!")
|
||||
return nil, fmt.Errorf("unsupported encoding method (%s)", *method)
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeStream decodes the stream data and returns the decoded data.
|
||||
// An error is returned upon failure.
|
||||
func DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
|
||||
common.Log.Trace("Decode stream")
|
||||
|
||||
encoder, err := NewEncoderFromStream(streamObj)
|
||||
if err != nil {
|
||||
common.Log.Debug("Stream decoding failed: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
common.Log.Trace("Encoder: %#v\n", encoder)
|
||||
|
||||
decoded, err := encoder.DecodeStream(streamObj)
|
||||
if err != nil {
|
||||
common.Log.Debug("Stream decoding failed: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return decoded, nil
|
||||
}
|
||||
|
||||
// EncodeStream encodes the stream data using the encoded specified by the stream's dictionary.
|
||||
func EncodeStream(streamObj *PdfObjectStream) error {
|
||||
common.Log.Trace("Encode stream")
|
||||
|
||||
encoder, err := NewEncoderFromStream(streamObj)
|
||||
if err != nil {
|
||||
common.Log.Debug("Stream decoding failed: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if lzwenc, is := encoder.(*LZWEncoder); is {
|
||||
// If LZW:
|
||||
// Make sure to use EarlyChange 0.. We do not have write support for 1 yet.
|
||||
lzwenc.EarlyChange = 0
|
||||
streamObj.PdfObjectDictionary.Set("EarlyChange", MakeInteger(0))
|
||||
}
|
||||
|
||||
common.Log.Trace("Encoder: %+v\n", encoder)
|
||||
encoded, err := encoder.EncodeBytes(streamObj.Stream)
|
||||
if err != nil {
|
||||
common.Log.Debug("Stream encoding failed: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
streamObj.Stream = encoded
|
||||
|
||||
// Update length
|
||||
streamObj.PdfObjectDictionary.Set("Length", MakeInteger(int64(len(encoded))))
|
||||
|
||||
return nil
|
||||
}
|
||||
74
internal/pdf/core/symbols.go
Normal file
74
internal/pdf/core/symbols.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package core
|
||||
|
||||
// IsWhiteSpace checks if byte represents a white space character.
|
||||
// TODO (v3): Unexport.
|
||||
func IsWhiteSpace(ch byte) bool {
|
||||
// Table 1 white-space characters (7.2.2 Character Set)
|
||||
// spaceCharacters := string([]byte{0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20})
|
||||
if (ch == 0x00) || (ch == 0x09) || (ch == 0x0A) || (ch == 0x0C) || (ch == 0x0D) || (ch == 0x20) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsFloatDigit checks if a character can be a part of a float number string.
|
||||
// TODO (v3): Unexport.
|
||||
func IsFloatDigit(c byte) bool {
|
||||
return ('0' <= c && c <= '9') || c == '.'
|
||||
}
|
||||
|
||||
// IsDecimalDigit checks if the character is a part of a decimal number string.
|
||||
// TODO (v3): Unexport.
|
||||
func IsDecimalDigit(c byte) bool {
|
||||
if c >= '0' && c <= '9' {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// IsOctalDigit checks if a character can be part of an octal digit string.
|
||||
// TODO (v3): Unexport.
|
||||
func IsOctalDigit(c byte) bool {
|
||||
if c >= '0' && c <= '7' {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// IsPrintable checks if a character is printable.
|
||||
// Regular characters that are outside the range EXCLAMATION MARK(21h)
|
||||
// (!) to TILDE (7Eh) (~) should be written using the hexadecimal notation.
|
||||
// TODO (v3): Unexport.
|
||||
func IsPrintable(char byte) bool {
|
||||
if char < 0x21 || char > 0x7E {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsDelimiter checks if a character represents a delimiter.
|
||||
// TODO (v3): Unexport.
|
||||
func IsDelimiter(char byte) bool {
|
||||
if char == '(' || char == ')' {
|
||||
return true
|
||||
}
|
||||
if char == '<' || char == '>' {
|
||||
return true
|
||||
}
|
||||
if char == '[' || char == ']' {
|
||||
return true
|
||||
}
|
||||
if char == '{' || char == '}' {
|
||||
return true
|
||||
}
|
||||
if char == '/' {
|
||||
return true
|
||||
}
|
||||
if char == '%' {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
172
internal/pdf/core/utils.go
Normal file
172
internal/pdf/core/utils.go
Normal file
@@ -0,0 +1,172 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/common"
|
||||
)
|
||||
|
||||
// Check slice range to make sure within bounds for accessing:
|
||||
//
|
||||
// slice[a:b] where sliceLen=len(slice).
|
||||
func checkBounds(sliceLen, a, b int) error {
|
||||
if a < 0 || a > sliceLen {
|
||||
return errors.New("slice index a out of bounds")
|
||||
}
|
||||
if b < a {
|
||||
return errors.New("invalid slice index b < a")
|
||||
}
|
||||
if b > sliceLen {
|
||||
return errors.New("slice index b out of bounds")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Inspect analyzes the document object structure.
|
||||
func (parser *PdfParser) Inspect() (map[string]int, error) {
|
||||
return parser.inspect()
|
||||
}
|
||||
|
||||
// GetObjectNums returns a sorted list of object numbers of the PDF objects in the file.
|
||||
func (parser *PdfParser) GetObjectNums() []int {
|
||||
objNums := []int{}
|
||||
for _, x := range parser.xrefs {
|
||||
objNums = append(objNums, x.objectNumber)
|
||||
}
|
||||
|
||||
// Sort the object numbers to give consistent ordering of PDF objects in output.
|
||||
// Needed since parser.xrefs is a map.
|
||||
sort.Ints(objNums)
|
||||
|
||||
return objNums
|
||||
}
|
||||
|
||||
/*
|
||||
* Inspect object types.
|
||||
* Go through all objects in the cross ref table and detect the types.
|
||||
* Mostly for debugging purposes and inspecting odd PDF files.
|
||||
*/
|
||||
func (parser *PdfParser) inspect() (map[string]int, error) {
|
||||
common.Log.Trace("--------INSPECT ----------")
|
||||
common.Log.Trace("Xref table:")
|
||||
|
||||
objTypes := map[string]int{}
|
||||
objCount := 0
|
||||
failedCount := 0
|
||||
|
||||
keys := []int{}
|
||||
for k := range parser.xrefs {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Ints(keys)
|
||||
|
||||
i := 0
|
||||
for _, k := range keys {
|
||||
xref := parser.xrefs[k]
|
||||
if xref.objectNumber == 0 {
|
||||
continue
|
||||
}
|
||||
objCount++
|
||||
common.Log.Trace("==========")
|
||||
common.Log.Trace("Looking up object number: %d", xref.objectNumber)
|
||||
o, err := parser.LookupByNumber(xref.objectNumber)
|
||||
if err != nil {
|
||||
common.Log.Trace("error: Fail to lookup obj %d (%s)", xref.objectNumber, err)
|
||||
failedCount++
|
||||
continue
|
||||
}
|
||||
|
||||
common.Log.Trace("obj: %s", o)
|
||||
|
||||
iobj, isIndirect := o.(*PdfIndirectObject)
|
||||
if isIndirect {
|
||||
common.Log.Trace("IND OOBJ %d: %s", xref.objectNumber, iobj)
|
||||
dict, isDict := iobj.PdfObject.(*PdfObjectDictionary)
|
||||
if isDict {
|
||||
// Check if has Type parameter.
|
||||
if ot, has := dict.Get("Type").(*PdfObjectName); has {
|
||||
otype := string(*ot)
|
||||
common.Log.Trace("---> Obj type: %s", otype)
|
||||
_, isDefined := objTypes[otype]
|
||||
if isDefined {
|
||||
objTypes[otype]++
|
||||
} else {
|
||||
objTypes[otype] = 1
|
||||
}
|
||||
} else if ot, has := dict.Get("Subtype").(*PdfObjectName); has {
|
||||
// Check if subtype
|
||||
otype := string(*ot)
|
||||
common.Log.Trace("---> Obj subtype: %s", otype)
|
||||
_, isDefined := objTypes[otype]
|
||||
if isDefined {
|
||||
objTypes[otype]++
|
||||
} else {
|
||||
objTypes[otype] = 1
|
||||
}
|
||||
}
|
||||
if val, has := dict.Get("S").(*PdfObjectName); has && *val == "JavaScript" {
|
||||
// Check if Javascript.
|
||||
_, isDefined := objTypes["JavaScript"]
|
||||
if isDefined {
|
||||
objTypes["JavaScript"]++
|
||||
} else {
|
||||
objTypes["JavaScript"] = 1
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} else if sobj, isStream := o.(*PdfObjectStream); isStream {
|
||||
if otype, ok := sobj.PdfObjectDictionary.Get("Type").(*PdfObjectName); ok {
|
||||
common.Log.Trace("--> Stream object type: %s", *otype)
|
||||
k := string(*otype)
|
||||
objTypes[k]++
|
||||
}
|
||||
} else { // Direct.
|
||||
dict, isDict := o.(*PdfObjectDictionary)
|
||||
if isDict {
|
||||
ot, isName := dict.Get("Type").(*PdfObjectName)
|
||||
if isName {
|
||||
otype := string(*ot)
|
||||
common.Log.Trace("--- obj type %s", otype)
|
||||
objTypes[otype]++
|
||||
}
|
||||
}
|
||||
common.Log.Trace("DIRECT OBJ %d: %s", xref.objectNumber, o)
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
common.Log.Trace("--------EOF INSPECT ----------")
|
||||
common.Log.Trace("=======")
|
||||
common.Log.Trace("Object count: %d", objCount)
|
||||
common.Log.Trace("Failed lookup: %d", failedCount)
|
||||
for t, c := range objTypes {
|
||||
common.Log.Trace("%s: %d", t, c)
|
||||
}
|
||||
common.Log.Trace("=======")
|
||||
|
||||
if len(parser.xrefs) < 1 {
|
||||
common.Log.Debug("error: This document is invalid (xref table missing!)")
|
||||
return nil, fmt.Errorf("invalid document (xref table missing)")
|
||||
}
|
||||
|
||||
fontObjs, ok := objTypes["Font"]
|
||||
if !ok || fontObjs < 2 {
|
||||
common.Log.Trace("This document is probably scanned!")
|
||||
} else {
|
||||
common.Log.Trace("This document is valid for extraction!")
|
||||
}
|
||||
|
||||
return objTypes, nil
|
||||
}
|
||||
|
||||
func absInt(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
} else {
|
||||
return x
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user