Java使用aspose批量将PDF转为word

导读:本篇文章讲解 Java使用aspose批量将PDF转为word,希望对大家有帮助,欢迎收藏,转发!站点地址:www.bmabk.com

最近有一些学习资料大概几个G,搞得全是PDF。没办法编辑。所以就想转成word。但是搜了很多软件没有批量转换功能。只能一个一个处理,太浪费时间。最主要的还是全部收费。所以决定自己写一个还能节省时间。

需要注意的事项:jar包必须破解,如果不是破解版每个文档只能转换4页。

怎么破解jar包网上有教程感兴趣的可以自己破解一下。我就不写了。

jar包资源:aspose-pdf.zip-互联网文档类资源-CSDN下载

1.将需要的aspose.pdf.jar包引入项目

2.封装读取文件夹里面PDF文件的工具类

package com.question.syncdemo.utils;

import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.util.StrUtil;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
 * description: FileUtils  文件操作工具类<br>
 *
 * @date: 2020/11/17 0017 下午 5:06 <br>
 * @author: William <br>
 * version: 1.0 <br>
 */
public class FileUtils {



    //因为我这个是临时用所以没有考虑并发,如果并发自己修改一下就好了
    public static List<String> resultList = new ArrayList<>();


    /**
     *@description: 通过文件路径,修改该路径下所有文件的名字
     * @param path  文件夹路径
     * @return:
     * @author: William
     * @date 2019/8/8 14:52
     */
    public static List<String> getFilesPaths(String path,List<String> stringList){
        File file = new File(path);
        if(file.exists()){
            File[] files = file.listFiles();
            if (null == files || files.length == 0) {
                System.out.println("文件夹是空的!");
            } else {
                for (File file2 : files) {
                    if (file2.isDirectory()) {
                        getFilesPaths(file2.getAbsolutePath(),stringList);
                    } else {
                        String filePath = file2.getAbsolutePath();
                        stringList.add(filePath);
                    }
                }
            }
        }else{
            System.out.println("该路径不存在");
        }
        return stringList;
    }

}

3.封装PDF处理工具类

package com.question.syncdemo.utils;

import com.aspose.pdf.Document;
import com.aspose.pdf.License;
import com.aspose.pdf.SaveFormat;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicReference;

/**
 * description: PDFUtil <br>
 *
 * @date: 2021/2/4 0004 上午 10:09 <br>
 * @author: William <br>
 * version: 1.0 <br>
 */
public class PDFUtil {
    private static InputStream license;


    public static void main(String[] args) throws Exception {
        pdf2word();
    }




    //多线程处理需要转换格式的文件
    public static void produceData(List<String> list) throws InterruptedException {
        //每个线程处理的数据,我这里只开了三个线程,
        int threadSize = list.size()/3;
        //int threadSize = 500;  可以每个线程处理500条数据
        int remainder = list.size() % threadSize;
        //线程数
        int threadNum = 0;
        if (remainder == 0) {
            threadNum = list.size() / threadSize;
        } else {
            threadNum = list.size() / threadSize + 1;
        }
        long begin = System.currentTimeMillis();
        //创建一个线程池
        ExecutorService eService = Executors.newFixedThreadPool(threadNum);
        List<Callable<String>> cList = new ArrayList<>();
        Callable<String> task = null;
        List<String> sList = null;
        for (int i = 0; i < threadNum; i++) {
            if (i == threadNum - 1) {
                sList = list.subList(i * threadSize, list.size());
            } else {
                sList = list.subList(i * threadSize, (i + 1) * threadSize);
            }
            final List<String> nowList = sList;
            task = new Callable<String>() {
                @Override
                public String call() throws Exception {
                    nowList.forEach(filesPath -> {
                        if(filesPath.contains(".pdf")){
                            File file = new File(filesPath);
                            String paperName = file.getName();
                            paperName = paperName.substring(0,paperName.lastIndexOf("."));
                            String tempFilesPath = filesPath.substring(0,filesPath.lastIndexOf(File.separator));
                            tempFilesPath = tempFilesPath +"\\"+paperName+".docx";
                            System.out.println(tempFilesPath);
                            try {
                                saveAsWord(filesPath,tempFilesPath);
                            } catch (Exception e) {
                                e.printStackTrace();
                            }
                        }
                    });
                    return "ok";
                }
            };
            cList.add(task);
        }
        List<Future<String>> results = eService.invokeAll(cList);
        for (Future<String> str : results) {
            //System.out.println(str.get());
        }
        eService.shutdown();
        long end = System.currentTimeMillis();
        System.out.println("执行耗时:" + (end - begin));
    }



    public static void pdf2word() throws Exception {
        List<String> strings = new ArrayList<>();
        List<String> filesPaths = FileUtils.getFilesPaths("D:\\work\\temp\\中学学段2019科目二", strings);
        produceData(filesPaths);
    }



    //将PDF保存为word
    public static void saveAsWord(String targetFile,String newFile) throws Exception {
        File target = new File(targetFile);
        if(!target.exists()){
            target.mkdirs();
        }
        FileInputStream targetInputStream = new FileInputStream(target);
        //调用去水印的方法 读取license.xml文件
        if (!getLicense()) {
            System.out.println("获取验证失败");
        }
        Document targetDocument = new Document(targetInputStream);
        targetDocument.save(newFile, SaveFormat.DocX);
        targetInputStream.close();
        targetDocument.close();
    }





     //证书获取
    public static synchronized boolean getLicense() {
        boolean result = false;
        try {

            String license2 = "<License>\n"
                    + "  <Data>\n"
                    + "    <Products>\n"
                    + "      <Product>Aspose.Total for Java</Product>\n"
                    + "      <Product>Aspose.Words for Java</Product>\n"
                    + "    </Products>\n"
                    + "    <EditionType>Enterprise</EditionType>\n"
                    + "    <SubscriptionExpiry>20991231</SubscriptionExpiry>\n"
                    + "    <LicenseExpiry>20991231</LicenseExpiry>\n"
                    + "    <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>\n"
                    + "  </Data>\n"
                    + "  <Signature>111</Signature>\n"
                    + "</License>";
            license = new ByteArrayInputStream(license2.getBytes("UTF-8"));

            License aposeLic = new License();
            aposeLic.setLicense(license);
            result = true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }

}

如果没有币的直接加我微信获取就好了

Java使用aspose批量将PDF转为word

 

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

文章由半码博客整理,本文链接:https://www.bmabk.com/index.php/post/97029.html

(0)
小半的头像小半

相关推荐

半码博客——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!