40天训练-第4天-4-threading、multiprocessing、gevent、multiprocessingpool、threadingpool、gevent pool相互嵌套使用的结果与出现的情况

  • by

没想到都2020了,前几篇尾角还是下意识写的2019,时光真是抓不住呀

这里就只记录三种Pool的嵌套,排列组合3*2=6种,一般就3 M-T M-G T-G(M-G冲突),就只剩两种代码

以函数调用30*150次,请求www.baidu.com 30*150 次,解析www.baidu.com 30*150 次为例进行编写 进行耗时比较

1.multiprocessing_pool-threading_pool

耗时:

import time
import multiprocessing
from multiprocessing import Pool as multi_pool
import threadpool
import queue

import sys, os

sys.path.append(os.path.abspath('../../libs'))
from common import threadPoolManager

# ------------------------------
import requests
import dns.resolver

# urls 1000
test_url="http://www.baidu.com"
urls=[]
[urls.append(test_url) for i in range(30)]
# catelogs 100000
global catelogs
catelog="/a"
catelogs=[]
[catelogs.append(catelog) for i in range(150)]
# domains
domain=".baidu.com"
domains=[]
[domains.append(domain) for i in range(30)]
# domain_dicts
global domain_dicts
domain_dict="a"
domain_dicts=[]
# [domain_dicts.append(domain_dict) for i in range(70000)]
[domain_dicts.append(domain_dict) for i in range(150)]

def test(domain):
    print(domain)

def request_url(target):
    try:
        s=requests.get(target)
        if s.status_code!=404:
            print(target)
    except:
        pass

# 请求dns服务器 查看域名是否存在
def domain_query(domain=None, dns_servers=None):
    try:
        ans = resolver.query(domain)
        if ans:
            ips = ', '.join(sorted([i.address for i in ans]))
            # print("success:"+domain)
            if "0.0.0.1" not in ips:
                print(domain+":"+ips)
                # domain_results.add(domain+":"+ips)
    except:
        pass
        # print("fail:" + domain)

global resolver
resolver = dns.resolver.Resolver()
dns_servers = ['114.114.114.114', '8.8.8.8', '223.5.5.5', '223.6.6.6', '119.29.29.29', '182.254.116.116']
resolver.lifetime = resolver.timeout = 6.0
resolver.nameservers = dns_servers  # 默认['114.114.114.114', '8.8.8.8']
# print(resolver.nameservers)

# ------------------------------



def run_threadpool_test(domain):
    # q = queue.Queue()
    # for catelog in catelogs:
    #     q.put(domain + catelog)
    # lst = [q.get() for i in range(q.qsize())]
    # thread_pool = threadpool.ThreadPool(30)
    # reqs = threadpool.makeRequests(test, lst)
    # [thread_pool.putRequest(req) for req in reqs]  # 多线程一块执行
    # thread_pool.wait()  # 线程挂起,直到结束
    temp_domains=[]
    for catelog in catelogs:
        temp_domains.append(domain + catelog)
    pool = threadPoolManager(temp_domains,my_function=test,threadNum=30)
    pool.waitAllComplete()

def run_threadpool_request(domain):
    temp_domains = []
    for catelog in catelogs:
        temp_domains.append(domain + catelog)
    pool = threadPoolManager(temp_domains, my_function=request_url, threadNum=30)
    pool.waitAllComplete()
def run_threadpool_subdomain(domain):
    temp_domains = []
    for domain_dict in domain_dicts:
        temp_domains.append(domain_dict+domain)
    pool = threadPoolManager(temp_domains, my_function=domain_query, threadNum=30)
    pool.waitAllComplete()

if __name__ == '__main__':

    # 1
    # multiprocessing-thrading 5*30 test
    # 耗时:2.4399943351745605
    # start_time = time.time()
    #
    # # multiprocessing.freeze_support()
    # multiprocessing_pool = multi_pool(5)
    # multiprocessing_pool.map(run_threadpool_test, urls)
    # multiprocessing_pool.close()
    # multiprocessing_pool.join()
    #
    # print(time.time() - start_time)

    # 2
    # multiprocessing-thrading 5*30 request_url
    # 耗时:163.1308376789093
    # start_time = time.time()
    #
    # # multiprocessing.freeze_support()
    # multiprocessing_pool = multi_pool(5)
    # multiprocessing_pool.map(run_threadpool_request, urls)
    # multiprocessing_pool.close()
    # multiprocessing_pool.join()
    # print(time.time() - start_time)

    # 2
    # multiprocessing - thrading 5 * 30 request_url
    # 耗时:120.17293190956116
    # start_time = time.time()
    #
    # # multiprocessing.freeze_support()
    # multiprocessing_pool = multi_pool(5)
    # multiprocessing_pool.map(run_threadpool_subdomain, domains)
    # multiprocessing_pool.close()
    # multiprocessing_pool.join()
    # print(time.time() - start_time)

2.multiprocessing_pool-gevent_pool

from multiprocessing import Pool as multi_pool
import gevent
from gevent import monkey, pool as gevent_pool
monkey.patch_all()

会直接挂起,不会运行,猜测有冲突

3.threading_pool-gevent_pool

import gevent
from gevent import monkey, pool

monkey.patch_all()
import time
import threadpool
import queue

import sys, os

sys.path.append(os.path.abspath('../../libs'))
from common import threadPoolManager

# ------------------------------
import requests
import dns.resolver

# urls 1000
test_url="http://www.baidu.com"
urls=[]
[urls.append(test_url) for i in range(30)]
# catelogs 100000
global catelogs
catelog="/a"
catelogs=[]
[catelogs.append(catelog) for i in range(150)]
# domains
domain=".baidu.com"
domains=[]
[domains.append(domain) for i in range(30)]
# domain_dicts
global domain_dicts
domain_dict="a"
domain_dicts=[]
# [domain_dicts.append(domain_dict) for i in range(70000)]
[domain_dicts.append(domain_dict) for i in range(150)]

def test(domain):
    print(domain)

def request_url(target):
    try:
        s=requests.get(target)
        if s.status_code!=404:
            print(target)
    except:
        pass

# 请求dns服务器 查看域名是否存在
def domain_query(domain=None, dns_servers=None):
    try:
        ans = resolver.query(domain)
        if ans:
            ips = ', '.join(sorted([i.address for i in ans]))
            # print("success:"+domain)
            if "0.0.0.1" not in ips:
                print(domain+":"+ips)
                # domain_results.add(domain+":"+ips)
    except:
        pass
        # print("fail:" + domain)

global resolver
resolver = dns.resolver.Resolver()
dns_servers = ['114.114.114.114', '8.8.8.8', '223.5.5.5', '223.6.6.6', '119.29.29.29', '182.254.116.116']
resolver.lifetime = resolver.timeout = 6.0
resolver.nameservers = dns_servers  # 默认['114.114.114.114', '8.8.8.8']
# print(resolver.nameservers)

# ------------------------------


def run_threadpool_test(domain):
    temp_domains=[]
    for catelog in catelogs:
        temp_domains.append(domain + catelog)
    # 7.228254318237305
    # gevent_list = [gevent.spawn(test, temp_domain) for temp_domain in temp_domains]
    # gevent.joinall(gevent_list)
    # 11.661890029907227
    # gevent_pool=gevent.pool.Pool(30)
    # gevent_list = [gevent_pool.spawn(test, temp_domain) for temp_domain in temp_domains]
    # gevent.joinall(gevent_list)

def run_threadpool_request(domain):
    temp_domains = []
    for catelog in catelogs:
        temp_domains.append(domain + catelog)
    # 626.7558057308197
    # for temp_domain in temp_domains:
    #     request_url(temp_domain)
    # 324.32001781463623
    # gevent_list = [gevent.spawn(request_url, temp_domain) for temp_domain in temp_domains]
    # gevent.joinall(gevent_list)
    # 325.25664258003235
    # gevent_pool=gevent.pool.Pool(30)
    # gevent_list = [gevent_pool.spawn(request_url, temp_domain) for temp_domain in temp_domains]
    # gevent.joinall(gevent_list)
def run_threadpool_subdomain(domain):
    temp_domains = []
    for domain_dict in domain_dicts:
        temp_domains.append(domain_dict+domain)
    # 188.70322918891907
    # for temp_domain in temp_domains:
    #     domain_query(temp_domain)
    # 40.130337715148926
    # gevent_list = [gevent.spawn(domain_query, temp_domain) for temp_domain in temp_domains]
    # gevent.joinall(gevent_list)
    # 163.19447422027588
    # gevent_pool=gevent.pool.Pool(30)
    # gevent_list = [gevent_pool.spawn(domain_query, temp_domain) for temp_domain in temp_domains]
    # gevent.joinall(gevent_list)


if __name__ == '__main__':

    # 1
    #  5*30 test
    # 耗时:
    start_time = time.time()

    # pool = threadPoolManager(urls,my_function=run_threadpool_test,threadNum=5)
    # pool = threadPoolManager(urls, my_function=run_threadpool_request, threadNum=5)
    pool = threadPoolManager(domains, my_function=run_threadpool_subdomain, threadNum=5)
    pool.waitAllComplete()

    print(time.time() - start_time)

2020.1.14

标签:

发表评论

电子邮件地址不会被公开。 必填项已用*标注