# -*- coding: UTF-8 -*-
"""
@Time : 2022/8/10 13:00
@Author : JiaoHiacheng
@Email : JHC000abc@gmail.com
@Project : Python
@File : split_huge_txt_pro.py
@des : 大文本按行拆分,不符合拆分条件(少于预设拆分行数),不进行拆分
"""
import os
from datetime import datetime
def check_exists(file):
if not os.path.exists(file):
os.makedirs(file)
def remove_empty_file(file, split_txt):
os.remove(split_txt)
os.rmdir(file)
def split(in_file, out_file, constraint_num):
print("目标文件:{}\n拆分后输出位置:{}".format(in_file, out_file))
print("开始拆分:{}".format(datetime.now()))
check_exists(out_file)
in_name = os.path.split(in_file)[1]
num = 0
flag_split_num = 0
name_num = 0
f_in = open(in_file, "r", encoding="utf-8")
f_line = f_in.readline()
content_to_input_list = []
while f_line:
num += 1
flag_split_num += 1
content_to_input_list.append(f_line)
if flag_split_num == constraint_num:
name_num += 1
flag_split_num = 0
split_name = "target_{}_{}".format(str(name_num), in_name)
with open(os.path.join(out_file, split_name), "a", encoding="utf-8") as f_out:
for content in content_to_input_list:
f_out.write(content)
content_to_input_list.clear()
else:
pass
f_line = f_in.readline()
f_in.close()
if len(content_to_input_list) >= 0:
name_num += 1
split_name = "target_{}_{}".format(str(name_num), in_name)
with open(os.path.join(out_file, split_name), "a", encoding="utf-8") as f_out_last:
for content in content_to_input_list:
f_out_last.write(content)
content_to_input_list.clear()
print("文件总行数:{}\n拆分文件数:{}".format(num, name_num))
if name_num == 1:
split_name = os.path.join(
out_file, "target_{}_{}".format(
"1", in_name))
remove_empty_file(out_file, split_name)
print("文件未达到拆分行数,已删除拆分的文件")
else:
pass
print("拆分结束:{}".format(datetime.now()))
if __name__ == '__main__':
in_file = R"D:\Desktop\test\zh_jp_rq.txt"
out_file = R"D:\Desktop\test\split_res"
constraint_num = 1000
split(in_file, out_file, constraint_num)
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之家整理,本文链接:https://www.bmabk.com/index.php/post/156912.html