gofpdi icon indicating copy to clipboard operation
gofpdi copied to clipboard

A weird problem: default pdf text language changed after importing pdf page

Open kaiceo opened this issue 3 years ago • 3 comments

import (
          "github.com/phpdave11/gofpdf"
          "github.com/phpdave11/gofpdi"
      )

var FontType string = "NotoSansSC-Regular.ttf"

func NewPdf() *gofpdf.Fpdf {
    pdf := gofpdf.New("P", "pt", "A4", "")
 //chinese font add
    pdf.AddUTF8Font(FontType, "", "static/font/"+FontType)
    return pdf
}


var imp = gofpdi.NewImporter()
func ImportPdfPages(pdf *gofpdf.Fpdf, pdffile string) *gofpdf.Fpdf {
    imp.SetSourceFile(pdffile)
    pageSizes := imp.GetPageSizes()
    total := len(pageSizes)

  pdfReader := imp.GetReader()

  for i := 1; i <= total; i++ {
	  rotation, _ := pdfReader.GetPageRotation(i)
	  curWidth := pageSizes[i]["/MediaBox"]["w"]
	  curHeight := pageSizes[i]["/MediaBox"]["h"]
	  angle := rotation.Int % 360
	  // Normalize angle
	  tpl := getTemplateID(imp, pdf, i, "/MediaBox")
	  if angle != 0 && (angle/90)%2 != 0 {
		  pdf.AddPageFormat(gofpdf.OrientationLandscape, gofpdf.SizeType{Wd: curWidth, Ht: curHeight})
		  pdf.UseImportedTemplate(imp.UseTemplate(tpl, 0, 0, curHeight, curWidth))
	  } else {
		  pdf.AddPage()
		  pdf.UseImportedTemplate(imp.UseTemplate(tpl, 0, 0, curWidth, curHeight))
	  }
  }
  return pdf
}

//github.com/phpdave11/gofpdi 
func getTemplateID(i *gofpdi.Importer, f *gofpdf.Fpdf, pageno int, box string) int {
    tp := i.ImportPage(pageno, box)
    tplObjIDs := i.PutFormXobjectsUnordered()
    f.ImportTemplates(tplObjIDs)
    imported := i.GetImportedObjectsUnordered()
    f.ImportObjects(imported)
    importedObjPos := i.GetImportedObjHashPos()
    f.ImportObjPos(importedObjPos)
    return tp
}

// test on window7 \centos7.5 go 1.16. pdf text is chinese language before importing(sample pdf page 2) image

changed to english after importing

QQ截图20210527100919

I dont know the reason ....

Simple Pdf is here ↓↓↓↓↓↓↓↓↓

BODYL-P1.pdf

kaiceo avatar May 27 '21 02:05 kaiceo

It seems the reason about CID font not be supported Further more, i found DecodeParams in the pdf source code seems not be supported on reader.go <</DecodeParms<</Columns 5/Predictor 12>

kaiceo avatar May 31 '21 02:05 kaiceo

use mupdf(mutool) clean pdf before importing, then ok

kaiceo avatar Jun 03 '21 01:06 kaiceo

pdfReader.GetPageRotation undefined

ericzhao007 avatar Sep 13 '22 09:09 ericzhao007