项目介绍:
Golang PDF library for creating and processing PDF files (pure go)
项目地址:
https://github.com/unidoc/unipdf
GOPath方式安装:
go get github.com/unidoc/unipdf/...
中文支持:
支持特性:
Create PDF reports. Example output: unidoc-report.pdf.
Table PDF reports. Example output: unipdf-tables.pdf.
Invoice creation
Paragraph in creator handling multiple styles within the same paragraph
Merge PDF pages
Split PDF pages and change page order
Rotate pages
Extract text from PDF files
Text extraction support with size, position and formatting info
Extract images with coordinates
Images to PDF
Add images to pages
Compress and optimize PDF
Watermark PDF files
Advanced page manipulation (blocks/templates)
Load PDF templates and modify
Form creation
Fill and flatten forms
Fill out forms and FDF merging
Unlock PDF files / remove password
Protect PDF files with a password
Digital signing validation and signing
CCITTFaxDecode decoding and encoding support
JBIG2 decoding support
官方Demo的改进
默认情况下的官方demo,import 不正确,以下是修复后的版本
package main
import (
"fmt"
"os"
"github.com/unidoc/unipdf/extractor"
pdf "github.com/unidoc/unipdf/model"
)
func main() {
if len(os.Args) < 2 {
fmt.Printf("Usage: go run main.go input.pdf\n")
os.Exit(1)
}
// Make sure to enter a valid license key.
// Otherwise text is truncated and a watermark added to the text.
// License keys are available via: https://unidoc.io
/*
license.SetLicenseKey(`
-----BEGIN UNIDOC LICENSE KEY-----
...key contents...
-----END UNIDOC LICENSE KEY-----
`)
*/
// For debugging.
//common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug))
inputPath := os.Args[1]
err := outputPdfText(inputPath)
if err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
}
// outputPdfText 输出pdf文件内容到终端
func outputPdfText(inputPath string) error {
f, err := os.Open(inputPath)
if err != nil {
return err
}
defer f.Close()
pdfReader, err := pdf.NewPdfReader(f)
if err != nil {
return err
}
numPages, err := pdfReader.GetNumPages()
if err != nil {
return err
}
fmt.Printf("Total Pages:%d\n", numPages)
fmt.Printf("--------------------\n")
fmt.Printf("PDF to text extraction:\n")
fmt.Printf("--------------------\n")
for i := 0; i < numPages; i++ {
pageNum := i + 1
page, err := pdfReader.GetPage(pageNum)
if err != nil {
return err
}
ex, err := extractor.New(page)
if err != nil {
return err
}
text, err := ex.ExtractText()
if err != nil {
return err
}
fmt.Println("------------------------------")
fmt.Printf("Page %d:\n", pageNum)
fmt.Printf("\"%s\"\n", text)
}
return nil
}