def openTxt():
with open("E:/a", 'rb') as f:
for line in f:
print(line)
elapsed = (time.clock() - start)
print("Time used:", elapsed)
请问这个程序要如何写成多进程来提高效率呢?
txt文件大概3G多。要好长时间,想提高效率。
谢谢
切分为多个进程,然后每个进程处理一个patch_size, 每次限定读取chunk_size防止patch_size过大不能一次读入。如果要打印记得加锁。
import multiprocessing
p_list = []
sample_log = 'sample.log'
# in byte
patch_size = 4
chunk_size = 1
# should be equal to file_size/chunk_size
p_num = 5
def process_by_chunk(i, lock):
# if you want see the print nicely, please add back the lock and indent
#with lock:
with open(sample_log, 'r') as f:
f.seek(i * patch_size)
while True:
chunk_data = f.read(chunk_size)
print 'index : {} read : {}'.format(i, chunk_data)
if not chunk_data or (f.tell() >= (i + 1) * patch_size):
break
return
def main():
#index = multiprocessing.Value('i', 0)
lock = multiprocessing.Lock()
for i in range(p_num):
p = multiprocessing.Process(target=process_by_chunk, args=(i, lock, ))
p_list.append(p)
p.start()
for p in p_list:
p.join()
if __name__ == '__main__':
main()
python3 ,Pool进程池,不要在 IDE里运行,一般IDE对多进程支持不要好~
import multiprocessing as mp
def showtxt(line):
print('pid:%s, txt:%s' % (mp.current_process().pid, line))
if __name__ == '__main__':
pool = mp.Pool()
with open("E:/a", 'r') as f:
pool.map_async(showtxt, f)
pool.close()
pool.join()
input('ok')