some improvments

This commit is contained in:
Adrian Zürcher
2026-01-01 11:00:23 +01:00
parent 8f313c00f0
commit ef0778c8b3
20 changed files with 305 additions and 385 deletions

View File

@@ -3,6 +3,7 @@ package contentstream
import (
"bytes"
"fmt"
"strings"
"gitea.tecamino.com/paadi/pdfmerge/internal/pdf/core"
)
@@ -111,7 +112,7 @@ func (s *ContentStreamParser) ExtractText() (string, error) {
}
inText := false
xPos, yPos := float64(-1), float64(-1)
txt := ""
var txt strings.Builder
for _, op := range *operations {
switch op.Operand {
case "BT":
@@ -122,7 +123,7 @@ func (s *ContentStreamParser) ExtractText() (string, error) {
if op.Operand == "Td" || op.Operand == "TD" || op.Operand == "T*" {
// Move to next line...
txt += "\n"
txt.WriteString("\n")
}
if op.Operand == "Tm" {
if len(op.Params) != 6 {
@@ -147,7 +148,7 @@ func (s *ContentStreamParser) ExtractText() (string, error) {
if yPos == -1 {
yPos = float64(*yfloat)
} else if yPos > float64(*yfloat) {
txt += "\n"
txt.WriteString("\n")
xPos = float64(*xfloat)
yPos = float64(*yfloat)
continue
@@ -155,7 +156,7 @@ func (s *ContentStreamParser) ExtractText() (string, error) {
if xPos == -1 {
xPos = float64(*xfloat)
} else if xPos < float64(*xfloat) {
txt += "\t"
txt.WriteString("\t")
xPos = float64(*xfloat)
}
}
@@ -170,14 +171,14 @@ func (s *ContentStreamParser) ExtractText() (string, error) {
for _, obj := range *paramList {
switch v := obj.(type) {
case *core.PdfObjectString:
txt += string(*v)
txt.WriteString(string(*v))
case *core.PdfObjectFloat:
if *v < -100 {
txt += " "
txt.WriteString(" ")
}
case *core.PdfObjectInteger:
if *v < -100 {
txt += " "
txt.WriteString(" ")
}
}
}
@@ -189,9 +190,9 @@ func (s *ContentStreamParser) ExtractText() (string, error) {
if !ok {
return "", fmt.Errorf("invalid parameter type, not string (%T)", op.Params[0])
}
txt += string(*param)
txt.WriteString(string(*param))
}
}
return txt, nil
return txt.String(), nil
}