40天训练-第3天-4-multiprocessingpool、threadingpool和gevent pool性能对比

  • by

这两天优化代码 遇到一个一直以来的盲点

反复的用gevent来进行操作,经过实际对比,发现gevent有时竟然不工作,或者说工作效率很低,简单进行了io与非io比较,gevent针对的是io操作,如文件io、requests请求网络io等

有时需要结合使用thradpool ,由于进程的切换、机器性能问题,这里不进行编写

kali linux python3安装gevent :sudo apt-get install python3-gevent(pip3 install gevent会报错)

这是一篇不错的参考:https://my.oschina.net/sukai/blog/649793

1.先来看下各请求 www.baidu.com requests 1000下所用时间(这里不合理的地方是多线程设定为30)

可以先把结果放出来分析:

( threadpool、gevent、一般 )

18.209562301635742
11.11320948600769
123.77365732192993

import gevent
from gevent import monkey, pool

monkey.patch_all()
import time
import queue
import threadpool
import requests

# requests 请求对比
# ------------------------------

test_url="http://www.baidu.com"
urls=[]
[urls.append(test_url) for i in range(1000)]

def request_url(target):
    try:
        requests.get(target)
    except:
        pass

# 1
start_time=time.time()
q = queue.Queue()
for url in urls:
    q.put(url)
lst = [q.get() for i in range(q.qsize())]
thread_pool = threadpool.ThreadPool(30)
reqs = threadpool.makeRequests(request_url, lst)
[thread_pool.putRequest(req) for req in reqs]  # 多线程一块执行
thread_pool.wait()  # 线程挂起,直到结束

end_time=time.time()
print(end_time-start_time)

# 2
# 不限制pool数量
start_time=time.time()
jobs = [gevent.spawn(request_url, url) for url in urls]
gevent.joinall(jobs)
end_time=time.time()
print(end_time-start_time)
# 10.902864933013916

# 限制pool数量
start_time=time.time()
map_pool = gevent.pool.Pool(30)
data = map_pool.map(request_url, urls)
end_time=time.time()
print(end_time-start_time)
# 11.11320948600769

# 3
start_time=time.time()
for url in urls:
    request_url(url)
end_time=time.time()
print(end_time-start_time)

#这种方法无用
# # 2
# start_time=time.time()
# scan_pool = pool.Pool(30)
# gevent_list = [scan_pool.spawn(request_url(url)) for url in urls]
# gevent.joinall(gevent_list)
#
# end_time=time.time()
# print(end_time-start_time)
#
#



参考文中有位前辈写的thradpool demo 时间耗费更少:

8.602479696273804

import requests
import time
import multiprocessing
import threading
import queue


def request_url(target):
    try:
        requests.get(target)
    except:
        pass

class threadPoolManager:
    def __init__(self, urls, workNum=10000, threadNum=20):
        self.workQueue = queue.Queue()
        self.threadPool = []
        self.__initWorkQueue(urls)
        self.__initThreadPool(threadNum)

    def __initWorkQueue(self, urls):
        for i in urls:
            self.workQueue.put((request_url, i))

    def __initThreadPool(self, threadNum):
        for i in range(threadNum):
            self.threadPool.append(work(self.workQueue))

    def waitAllComplete(self):
        for i in self.threadPool:
            if i.isAlive():
                i.join()


class work(threading.Thread):
    def __init__(self, workQueue):
        threading.Thread.__init__(self)
        self.workQueue = workQueue
        self.start()

    def run(self):
        while True:
            if self.workQueue.qsize():
                do, args = self.workQueue.get(block=False)
                do(args)
                self.workQueue.task_done()
            else:
                break


# urls = ['http://www.ustchacker.com'] * 10
# pool = multiprocessing.Pool(PoolNum)
# data = pool.map(download_requests, urls)
# pool.close()
# pool.join()
url="http://www.baidu.com"
urls=[]
[urls.append(url) for i in range(1000)]
start_time=time.time()
pool = threadPoolManager(urls, threadNum=30)
pool.waitAllComplete()
print(time.time()-start_time)

2.再来看下各调用100000次函数,所用时间

很有意思

结果(threadpool、gevent、一般):

8.119859457015991
72.54705858230591
115.30850791931152
0.039998769760131836

import gevent
from gevent import monkey, pool

monkey.patch_all()
import time
import queue
import threadpool
import requests

# requests 请求对比
# ------------------------------

urls=[]
[urls.append(i) for i in range(100000)]

def test(target):
    pass

# 1
start_time=time.time()
q = queue.Queue()
for url in urls:
    q.put(url)
lst = [q.get() for i in range(q.qsize())]
thread_pool = threadpool.ThreadPool(30)
reqs = threadpool.makeRequests(test, lst)
[thread_pool.putRequest(req) for req in reqs]  # 多线程一块执行
thread_pool.wait()  # 线程挂起,直到结束

end_time=time.time()
print(end_time-start_time)

# 2

# 不限制pool数量
start_time=time.time()
jobs = [gevent.spawn(test, url) for url in urls]
gevent.joinall(jobs)
end_time=time.time()
print(end_time-start_time)

# 限制pool数量
start_time=time.time()
map_pool = gevent.pool.Pool(30)
data = map_pool.map(test, urls)
end_time=time.time()
print(end_time-start_time)


# 3
start_time=time.time()
for url in urls:
    test(url)
end_time=time.time()
print(end_time-start_time)


2019.1.13

标签:

发表评论

电子邮件地址不会被公开。 必填项已用*标注