pdf-diff icon indicating copy to clipboard operation
pdf-diff copied to clipboard

replace poppler with inline code

Open gedw99 opened this issue 3 years ago • 0 comments

We can replace https://github.com/serhack/pdf-diff/blob/5535f71841530a024fd0b8b1f823de03df9a0fc1/main.go#L48 with this if we want. ....

package main

import (
	"flag"
	"fmt"
	"image/jpeg"
	"os"
	"path/filepath"

	"github.com/gen2brain/go-fitz"
)

func main() {
	// flags for source dir (pdf) and output dir (png,etc)

	sourceFile := flag.String("source", ".", "source file pdf")
	targetDir := flag.String("target", ".", "target dir")

	flag.Parse()

	fmt.Println("sourceFile:", *sourceFile)
	fmt.Println("targetDir:", *targetDir)

	doc, err := fitz.New(*sourceFile)
	if err != nil {
		panic(err)
	}

	defer doc.Close()
	/*
		// output to runtime dir
		currentDir, err := os.Getwd()
		//tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
		if err != nil {
			panic(err)
		}

		// concat out dir
		tmpDir := filepath.Join(currentDir, "out")
		err = os.MkdirAll(tmpDir, os.ModePerm)
	*/
	err = os.MkdirAll(*targetDir, os.ModePerm)

	if err != nil {
		panic(err)
	}

	// Extract pages as images
	for n := 0; n < doc.NumPage(); n++ {
		img, err := doc.Image(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.jpg", n)))
		if err != nil {
			panic(err)
		}

		err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality})
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as text
	for n := 0; n < doc.NumPage(); n++ {
		text, err := doc.Text(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.txt", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(text)
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as html
	for n := 0; n < doc.NumPage(); n++ {
		html, err := doc.HTML(n, true)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(*targetDir, fmt.Sprintf("test%03d.html", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(html)
		if err != nil {
			panic(err)
		}

		f.Close()
	}
}

This will build for all OS because the libs are included for all os at https://github.com/gen2brain/go-fitz/tree/master/libs

works for me on Mac. Maybe test on windows, and linux.

it would replace poppler which is very heavy IMHO and make the golang binary fully contained to a single file.

gedw99 avatar Aug 26 '22 15:08 gedw99