切割大文本

得意时要看淡,失意时要看开。不论得意失意,切莫大意;不论成功失败,切莫止步。志得意满时,需要的是淡然,给自己留一条退路;失意落魄时,需要的是泰然,给自己觅一条出路切割大文本,希望对大家有帮助,欢迎收藏,转发!站点地址:www.bmabk.com,来源:原文

# -*- coding: UTF-8 -*-
"""
@Time    : 2022/8/10 13:00
@Author  : JiaoHiacheng
@Email   : JHC000abc@gmail.com
@Project : Python
@File    : split_huge_txt_pro.py
@des     : 大文本按行拆分,不符合拆分条件(少于预设拆分行数),不进行拆分

"""
import os
from datetime import datetime


def check_exists(file):
    if not os.path.exists(file):
        os.makedirs(file)


def remove_empty_file(file, split_txt):
    os.remove(split_txt)
    os.rmdir(file)


def split(in_file, out_file, constraint_num):
    print("目标文件:{}\n拆分后输出位置:{}".format(in_file, out_file))
    print("开始拆分:{}".format(datetime.now()))
    check_exists(out_file)
    in_name = os.path.split(in_file)[1]
    num = 0
    flag_split_num = 0
    name_num = 0
    f_in = open(in_file, "r", encoding="utf-8")
    f_line = f_in.readline()
    content_to_input_list = []
    while f_line:
        num += 1
        flag_split_num += 1
        content_to_input_list.append(f_line)
        if flag_split_num == constraint_num:
            name_num += 1
            flag_split_num = 0
            split_name = "target_{}_{}".format(str(name_num), in_name)
            with open(os.path.join(out_file, split_name), "a", encoding="utf-8") as f_out:
                for content in content_to_input_list:
                    f_out.write(content)
                    content_to_input_list.clear()
        else:
            pass
        f_line = f_in.readline()
    f_in.close()

    if len(content_to_input_list) >= 0:
        name_num += 1
        split_name = "target_{}_{}".format(str(name_num), in_name)
        with open(os.path.join(out_file, split_name), "a", encoding="utf-8") as f_out_last:
            for content in content_to_input_list:
                f_out_last.write(content)
                content_to_input_list.clear()

    print("文件总行数:{}\n拆分文件数:{}".format(num, name_num))

    if name_num == 1:
        split_name = os.path.join(
            out_file, "target_{}_{}".format(
                "1", in_name))
        remove_empty_file(out_file, split_name)
        print("文件未达到拆分行数,已删除拆分的文件")
    else:
        pass

    print("拆分结束:{}".format(datetime.now()))


if __name__ == '__main__':
    in_file = R"D:\Desktop\test\zh_jp_rq.txt"
    out_file = R"D:\Desktop\test\split_res"
    constraint_num = 1000
    split(in_file, out_file, constraint_num)

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

文章由极客之家整理,本文链接:https://www.bmabk.com/index.php/post/156912.html

(0)
飞熊的头像飞熊bm

相关推荐

发表回复

登录后才能评论
极客之家——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!