pdf-diff
pdf-diff copied to clipboard
replace poppler with inline code
We can replace https://github.com/serhack/pdf-diff/blob/5535f71841530a024fd0b8b1f823de03df9a0fc1/main.go#L48 with this if we want. ....
package main
import (
"flag"
"fmt"
"image/jpeg"
"os"
"path/filepath"
"github.com/gen2brain/go-fitz"
)
func main() {
// flags for source dir (pdf) and output dir (png,etc)
sourceFile := flag.String("source", ".", "source file pdf")
targetDir := flag.String("target", ".", "target dir")
flag.Parse()
fmt.Println("sourceFile:", *sourceFile)
fmt.Println("targetDir:", *targetDir)
doc, err := fitz.New(*sourceFile)
if err != nil {
panic(err)
}
defer doc.Close()
/*
// output to runtime dir
currentDir, err := os.Getwd()
//tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
if err != nil {
panic(err)
}
// concat out dir
tmpDir := filepath.Join(currentDir, "out")
err = os.MkdirAll(tmpDir, os.ModePerm)
*/
err = os.MkdirAll(*targetDir, os.ModePerm)
if err != nil {
panic(err)
}
// Extract pages as images
for n := 0; n < doc.NumPage(); n++ {
img, err := doc.Image(n)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.jpg", n)))
if err != nil {
panic(err)
}
err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality})
if err != nil {
panic(err)
}
f.Close()
}
// Extract pages as text
for n := 0; n < doc.NumPage(); n++ {
text, err := doc.Text(n)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.txt", n)))
if err != nil {
panic(err)
}
_, err = f.WriteString(text)
if err != nil {
panic(err)
}
f.Close()
}
// Extract pages as html
for n := 0; n < doc.NumPage(); n++ {
html, err := doc.HTML(n, true)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.html", n)))
if err != nil {
panic(err)
}
_, err = f.WriteString(html)
if err != nil {
panic(err)
}
f.Close()
}
}
This will build for all OS because the libs are included for all os at https://github.com/gen2brain/go-fitz/tree/master/libs
works for me on Mac. Maybe test on windows, and linux.
it would replace poppler which is very heavy IMHO and make the golang binary fully contained to a single file.