首页

关于使用pdfbox的PDFSplit和PDFMerger对pdf内容分割、合并处理代码示例

标签:PDFSplit,pdfbox,PDFMerger,内容合并,内容分隔,代码示例     发布时间:2018-04-14   

一、前言

下面通过pdfbox的org.apache.pdfbox.pdmodel.PDDocument的pdf文件实现类的org.apache.pdfbox.multipdf.Splitter、org.apache.pdfbox.multipdf.PDFMergerUtility了分别通过org.apache.pdfbox.tools.PDFSplit、org.apache.pdfbox.tools.PDFMerger进行对pdf文件进行内容分割、合并代码示例。

二、代码示例

1.PDFSplit分割代码示例

package org.apache.pdfbox.tools;@b@@b@import java.io.File;@b@import java.io.IOException;@b@import java.io.FileOutputStream;@b@import java.util.List;@b@@b@import org.apache.pdfbox.pdmodel.PDDocument;@b@import org.apache.pdfbox.pdfwriter.COSWriter;@b@import org.apache.pdfbox.multipdf.Splitter;@b@@b@/**@b@ * This is the main program that will take a pdf document and split it into@b@ * a number of other documents.@b@ *@b@ * @author Ben Litchfield@b@ */@b@public final class PDFSplit@b@{@b@    private static final String PASSWORD = "-password";@b@    private static final String SPLIT = "-split";@b@    private static final String START_PAGE = "-startPage";@b@    private static final String END_PAGE = "-endPage";@b@    private static final String OUTPUT_PREFIX = "-outputPrefix";@b@@b@    private PDFSplit()@b@    {@b@    }@b@    /**@b@     * Infamous main method.@b@     *@b@     * @param args Command line arguments, should be one and a reference to a file.@b@     *@b@     * @throws IOException If there is an error parsing the document.@b@     */@b@    public static void main( String[] args ) throws IOException@b@    {@b@        // suppress the Dock icon on OS X@b@        System.setProperty("apple.awt.UIElement", "true");@b@@b@        PDFSplit split = new PDFSplit();@b@        split.split( args );@b@    }@b@@b@    private void split( String[] args ) throws IOException@b@    {@b@        String password = "";@b@        String split = null;@b@        String startPage = null;@b@        String endPage = null;@b@        Splitter splitter = new Splitter();@b@        String pdfFile = null;@b@        String outputPrefix = null;@b@        for( int i=0; i<args.length; i++ )@b@        {@b@            if( args[i].equals( PASSWORD ) )@b@            {@b@                i++;@b@                if( i >= args.length )@b@                {@b@                    usage();@b@                }@b@                password = args[i];@b@            }@b@            else if( args[i].equals( SPLIT ) )@b@            {@b@                i++;@b@                if( i >= args.length )@b@                {@b@                    usage();@b@                }@b@                split = args[i];@b@            }@b@            else if( args[i].equals( START_PAGE ) )@b@            {@b@                i++;@b@                if( i >= args.length )@b@                {@b@                    usage();@b@                }@b@                startPage = args[i];@b@            }@b@            else if( args[i].equals( END_PAGE ) )@b@            {@b@                i++;@b@                if( i >= args.length )@b@                {@b@                    usage();@b@                }@b@                endPage = args[i];@b@            }@b@            else if( args[i].equals( OUTPUT_PREFIX ) )@b@            {@b@                i++;@b@                outputPrefix = args[i];@b@            }@b@            else@b@            {@b@                if( pdfFile == null )@b@                {@b@                    pdfFile = args[i];@b@                }@b@            }@b@        }@b@@b@        if( pdfFile == null )@b@        {@b@            usage();@b@        }@b@        else@b@        {          @b@            if (outputPrefix == null)@b@            {@b@                outputPrefix = pdfFile.substring(0, pdfFile.lastIndexOf('.'));@b@            }@b@            PDDocument document = null;@b@            List<PDDocument> documents = null;@b@            try@b@            {@b@                document = PDDocument.load(new File(pdfFile), password);@b@@b@                int numberOfPages = document.getNumberOfPages();@b@                boolean startEndPageSet = false;@b@                if (startPage != null)@b@                {@b@                    splitter.setStartPage(Integer.parseInt( startPage ));@b@                    startEndPageSet = true;@b@                    if (split == null)@b@                    {@b@                        splitter.setSplitAtPage(numberOfPages);@b@                    }@b@                }@b@                if (endPage != null)@b@                {@b@                    splitter.setEndPage(Integer.parseInt( endPage ));@b@                    startEndPageSet = true;@b@                    if (split == null)@b@                    {@b@                        splitter.setSplitAtPage(Integer.parseInt( endPage ));@b@                    }@b@                }@b@                if (split != null)@b@                {@b@                    splitter.setSplitAtPage( Integer.parseInt( split ) );@b@                }@b@                else @b@                {@b@                    if (!startEndPageSet)@b@                    {@b@                        splitter.setSplitAtPage(1);@b@                    }@b@                }@b@                    @b@                documents = splitter.split( document );@b@                for( int i=0; i<documents.size(); i++ )@b@                {@b@                    PDDocument doc = documents.get( i );@b@                    String fileName = outputPrefix + "-" + (i + 1) + ".pdf";@b@                    writeDocument( doc, fileName );@b@                    doc.close();@b@                }@b@@b@            }@b@            finally@b@            {@b@                if( document != null )@b@                {@b@                    document.close();@b@                }@b@                for( int i=0; documents != null && i<documents.size(); i++ )@b@                {@b@                    PDDocument doc = documents.get(i);@b@                    doc.close();@b@                }@b@            }@b@        }@b@    }@b@@b@    private static void writeDocument( PDDocument doc, String fileName ) throws IOException@b@    {@b@        FileOutputStream output = null;@b@        COSWriter writer = null;@b@        try@b@        {@b@            output = new FileOutputStream( fileName );@b@            writer = new COSWriter( output );@b@            writer.write( doc );@b@        }@b@        finally@b@        {@b@            if( output != null )@b@            {@b@                output.close();@b@            }@b@            if( writer != null )@b@            {@b@                writer.close();@b@            }@b@        }@b@    }@b@@b@    /**@b@     * This will print the usage requirements and exit.@b@     */@b@    private static void usage()@b@    {@b@        String message = "Usage: java -jar pdfbox-app-x.y.z.jar PDFSplit [options] <inputfile>\n"@b@                + "\nOptions:\n"@b@                + "  -password  <password>  : Password to decrypt document\n"@b@                + "  -split     <integer>   : split after this many pages (default 1, if startPage and endPage are unset)\n"@b@                + "  -startPage <integer>   : start page\n"@b@                + "  -endPage   <integer>   : end page\n"@b@                + "  -outputPrefix <prefix> : Filename prefix for splitted files\n"@b@                + "  <inputfile>            : The PDF document to use\n";@b@        @b@        System.err.println(message);@b@        System.exit( 1 );@b@    }@b@}

2.PDFMerger合并代码示例

package org.apache.pdfbox.tools;@b@@b@import java.io.IOException;@b@import org.apache.pdfbox.io.MemoryUsageSetting;@b@import org.apache.pdfbox.multipdf.PDFMergerUtility;@b@@b@/**@b@ * This is the main program that will take a list of pdf documents and merge them,@b@ * saving the result in a new document.@b@ *@b@ * @author Ben Litchfield@b@ */@b@public final class PDFMerger@b@{@b@    @b@    private PDFMerger()@b@    {@b@    }@b@    /**@b@     * Infamous main method.@b@     *@b@     * @param args Command line arguments, should be at least 3.@b@     *@b@     * @throws IOException If there is an error parsing the document.@b@     */@b@    public static void main( String[] args ) throws IOException@b@    {@b@        // suppress the Dock icon on OS X@b@        System.setProperty("apple.awt.UIElement", "true");@b@@b@        PDFMerger merge = new PDFMerger();@b@        merge.merge( args );@b@    }@b@@b@    private void merge( String[] args ) throws IOException@b@    {@b@        int firstFileArgPos = 0;@b@@b@        if ( args.length - firstFileArgPos < 3 )@b@        {@b@            usage();@b@        }@b@@b@        PDFMergerUtility merger = new PDFMergerUtility();@b@        for( int i=firstFileArgPos; i<args.length-1; i++ )@b@        {@b@            String sourceFileName = args[i];@b@            merger.addSource(sourceFileName);@b@        }@b@@b@        String destinationFileName = args[args.length-1];@b@        merger.setDestinationFileName(destinationFileName);@b@        merger.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly());@b@    }@b@@b@    /**@b@     * This will print the usage requirements and exit.@b@     */@b@    private static void usage()@b@    {@b@        String message = "Usage: java -jar pdfbox-app-x.y.z.jar PDFMerger "@b@                + "<inputfiles 2..n> <outputfile>\n"@b@                + "\nOptions:\n"@b@                + "  <inputfiles 2..n> : 2 or more source PDF documents to merge\n"@b@                + "  <outputfile>      : The PDF document to save the merged documents to\n";@b@        @b@        System.err.println(message);@b@        System.exit(1);@b@    }@b@}