Python 请求删除网站以进一步检查是否找到关键字

Question

我想将网站从进一步检查过程中删除，如果我在网站上的“关键字”被找到，那么它们就不会被多次检查。我怎样才能做到这一点？我还是个初学者，但我在底部给你链接了我的整个脚本，谢谢。

如果在当前检查的网站上找到关键字“google”，我想从进一步检查中删除该网站。

    if "google" in r2.text:
            print (bcolors.OKGREEN + "Parameters Found : " +server+ "/" + para1 + "/" + para2 + bcolors.ENDC)
            client = server + "," + para1 + "," + para2 + "\n"
            f = open('log.txt', 'a')
            f.write(client)
            f.close()

我的整个剧本

import os
import sys
from threading import Thread, BoundedSemaphore
from datetime import datetime
import optparse
import requests
import urllib3

os.system("color")
requests.urllib3.disable_warnings()

maxConnections = 10
connection_lock = BoundedSemaphore(maxConnections)
time = datetime.now().time()

class bcolors:
    HEADER = '3[95m'
    OKBLUE = '3[94m'
    OKGREEN = '3[92m'
    WARNING = '3[93m'
    FAIL = '3[91m'
    BOLD = '3[1m'
    UNDERLINE = '3[4m'
    ENDC = '3[0m'


def connect(server, para1, para2):
    try:
        r = requests.request('put', server + para1 + para2, timeout=30, verify=False, headers={'Content-Type':'application/octet-stream'})
        r.close()      
    except Exception as e:
        print(e)
    r2 = requests.request('get', server + para1 + para2, verify=False, timeout=30, headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'})
    r2.close()
    if "google" in r2.text:
            print (bcolors.OKGREEN + "Parameters Found : " +server+ "/" + para1 + "/" + para2 + bcolors.ENDC)
            client = server + "," + para1 + "," + para2 + "\n"
            f = open('log.txt', 'a')
            f.write(client)
            f.close()


def generate_tests(hosts, paras1, paras2):
    i = 0
    for para1 in paras1:
        para1 = para1.strip('\n\r')
        for para2 in paras2:
            para2 = para2.strip('\n\r')
            for host in hosts:
                server = host.strip('\n\r')
                print (bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                print (bcolors.BOLD + "Website: " + bcolors.OKBLUE + server + para1 + para2 + bcolors.ENDC)
                print (bcolors.BOLD + "Parameter1: " + bcolors.OKBLUE + para1 + bcolors.ENDC)
                print (bcolors.BOLD + "Parameter2: " + bcolors.OKBLUE + para2 + bcolors.ENDC)
                i += 1
                print (bcolors.BOLD + "Attempts: " + bcolors.OKBLUE + str(i) + bcolors.ENDC)
                print (bcolors.BOLD + "Time Started: " + bcolors.OKBLUE + str(time) + bcolors.ENDC)
                print (bcolors.BOLD + "Time now: " + bcolors.OKBLUE + str(datetime.now().time()) + bcolors.ENDC)
                print (bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                t = Thread(target=connect, args=(server, para1, para2))
                t.start()

def read_test_files(hostsfile, paras1file, paras2file):
    hosts = open(hostsfile, 'r').readlines()
    paras1 = open(paras1file, 'r').readlines()
    paras2 = open(paras2file, 'r').readlines()
    generate_tests(hosts, paras1, paras2)


def main():
    parser = optparse.OptionParser('usage python test.py -H <hosts file> -U <para1 file> -P <para2 file>')
    parser.add_option('-H', dest='hostsfile', help="specify host file to test")
    parser.add_option('-U', dest='paras1file', help="specify possible parameters1")
    parser.add_option('-P', dest='paras2file', help="specify possible parameters2")

    (options, args) = parser.parse_args()

    if options.hostsfile and options.paras1file and options.paras2file:
        hostsfile = options.hostsfile
        paras1file = options.paras1file
        paras2file = options.paras2file
        read_test_files(hostsfile, paras1file, paras2file)

    else:
        print (parser.usage)
        exit(0)




if __name__ == "__main__":
    main()

Answer 1

可以创建一个白名单集合，用于保存校验通过的网站，每次启动线程前判断是否在白名单中，存在则跳过。

在下面的代码中，白名单集合是临时的。

如果希望每次启动程序时生效，可以将其保存到文件中，每次启动程序时读取文件，每次结束时写入。

import os
import sys
from threading import Thread, BoundedSemaphore
from datetime import datetime
import optparse
import requests
import urllib3

os.system("color")
requests.urllib3.disable_warnings()

maxConnections = 10
connection_lock = BoundedSemaphore(maxConnections)
time = datetime.now().time()

white_list = set()


class bcolors:
    HEADER = '3[95m'
    OKBLUE = '3[94m'
    OKGREEN = '3[92m'
    WARNING = '3[93m'
    FAIL = '3[91m'
    BOLD = '3[1m'
    UNDERLINE = '3[4m'
    ENDC = '3[0m'


def connect(server, para1, para2):
    try:
        r = requests.request('put', server + para1 + para2, timeout=30, verify=False,
                             headers={'Content-Type': 'application/octet-stream'})
        r.close()
    except Exception as e:
        print(e)
    r2 = requests.request('get', server + para1 + para2, verify=False, timeout=30, headers={
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'})
    r2.close()
    if "google" in r2.text:
        # add
        white_list.add(server)
        print(bcolors.OKGREEN + "Parameters Found : " + server + "/" + para1 + "/" + para2 + bcolors.ENDC)
        client = server + "," + para1 + "," + para2 + "\n"
        f = open('log.txt', 'a')
        f.write(client)
        f.close()


def generate_tests(hosts, paras1, paras2):
    i = 0
    for para1 in paras1:
        para1 = para1.strip('\n\r')
        for para2 in paras2:
            para2 = para2.strip('\n\r')
            for host in hosts:
                server = host.strip('\n\r')
                # Determine whether it is in the whitelist
                # The `host` in the whitelist has already been subjected to the `strip('\n\r')` operation.
                if server in white_list:
                    continue
                print(bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                print(bcolors.BOLD + "Website: " + bcolors.OKBLUE + server + para1 + para2 + bcolors.ENDC)
                print(bcolors.BOLD + "Parameter1: " + bcolors.OKBLUE + para1 + bcolors.ENDC)
                print(bcolors.BOLD + "Parameter2: " + bcolors.OKBLUE + para2 + bcolors.ENDC)
                i += 1
                print(bcolors.BOLD + "Attempts: " + bcolors.OKBLUE + str(i) + bcolors.ENDC)
                print(bcolors.BOLD + "Time Started: " + bcolors.OKBLUE + str(time) + bcolors.ENDC)
                print(bcolors.BOLD + "Time now: " + bcolors.OKBLUE + str(datetime.now().time()) + bcolors.ENDC)
                print(bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                t = Thread(target=connect, args=(server, para1, para2))
                t.start()
                t.join()


def read_test_files(hostsfile, paras1file, paras2file):
    hosts = open(hostsfile, 'r').readlines()
    paras1 = open(paras1file, 'r').readlines()
    paras2 = open(paras2file, 'r').readlines()
    generate_tests(hosts, paras1, paras2)


def main():
    parser = optparse.OptionParser('usage python test.py -H <hosts file> -U <para1 file> -P <para2 file>')
    parser.add_option('-H', dest='hostsfile', help="specify host file to test")
    parser.add_option('-U', dest='paras1file', help="specify possible parameters1")
    parser.add_option('-P', dest='paras2file', help="specify possible parameters2")

    (options, args) = parser.parse_args()

    if options.hostsfile and options.paras1file and options.paras2file:
        hostsfile = options.hostsfile
        paras1file = options.paras1file
        paras2file = options.paras2file
        read_test_files(hostsfile, paras1file, paras2file)

    else:
        print(parser.usage)
        exit(0)


if __name__ == "__main__":
    main()

Python 请求删除网站以进一步检查是否找到关键字

Python requests remove website from further checking if keyword found

python

python-requests