import requests
import json
import time
import re
import os
import sys
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

print("ツール制作者:神瀬来未")
print("Twitter:@41707")
print("連絡先:KohnoseLami@gmail.com")
print("このツールは二次配布、デコンパイル、自作発言、プログラムの改変、転売は許可しません。")

thread = int(input('使用するスレッド数を数字で打ち込んでください: '))

print("\nそれでは処理を開始します。量によってはとても時間がかかるので気長にお待ちください\n")

if os.path.exists("list.txt"):
    pass
else:
    print("\nエラー:list.txtが存在しません")
    time.sleep(3)
    sys.exit()

args = sys.argv
if len(args)==2:
        input_file = args[1]
else:
    input_file ="list.txt"

keywords_list=["Twitterアカウントが見つかりませんでした","そのページは存在しません","User not found.","見付かりません"]
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
headers = {'User-Agent': ua}

logs= []
error= []
timeout= []
urls_404 = []
urls_504 = []
urls_200 = []
urls_429 = []
urls_accept = []
Available = []
APIError = []

start = time.time()

def check_url(target_url, headers=headers, retry=3):

    for i in range(retry):

        try:
            start = time.time()

            req = requests.get(target_url, headers=headers, allow_redirects=False)
            logs.append(str(req.status_code)+"\t"+target_url)

            if req.status_code == 404:
                urls_404.append(target_url)
                html = BeautifulSoup(req.text, "html.parser")
                title = html.find("title").text
                body = html.find("body").text

                for keyword in keywords_list:
                    if keyword in body:

                        target_url = re.sub("(.*)(?=/)|/|(?=\?)(.*)", "", target_url)

                        r = requests.get("https://api.twitter.com/i/users/username_available.json?username=" + str(target_url))
                        jsondata = json.loads(r.text)
                        result = jsondata['valid']

                        if result == True:
                            print("@" + str(target_url))
                            Available.append("@" + str(target_url))
                            urls_accept.append(str(target_url))

                        elif result == False:
                            urls_accept.append(str(target_url))

                        elif "rate limit" in r.text:
                            print("API制限" + str(target_url))
                            urls_accept.append(str(target_url))
                            break

            elif req.status_code == 504:
                urls_504.append(target_url)

            elif req.status_code == 429:
                urls_429.append(target_url)

            elif req.status_code == 200:
                urls_200.append(target_url)
                html = BeautifulSoup(req.text, "html.parser")
                #print(str(html))
                title = html.find("title").text
                body = html.find("body").text

                for keyword in keywords_list:
                    if keyword in body:

                        target_url = re.sub("(.*)(?=/)|/|(?=\?)(.*)", "", target_url)

                        r = requests.get("https://api.twitter.com/i/users/username_available.json?username=" + str(target_url))
                        jsondata = json.loads(r.text)
                        result = jsondata['valid']

                        if result == True:
                            print("@" + str(target_url))
                            Available.append("@" + str(target_url))
                            urls_accept.append(str(target_url))

                        elif result == False:
                            urls_accept.append(str(target_url))

                        elif "rate limit" in r.text:
                            print("API制限" + str(target_url))
                            urls_accept.append(str(target_url))
                            break

            return
        except requests.exceptions.ConnectTimeout:
            logs.append("TIMEOUT"+"\t"+target_url)
            timeout.append(target_url)
            time.sleep(10)

        except requests.exceptions.ConnectionError:
            logs.append("ERROR"+"\t"+target_url)
            error.append(target_url)
            time.sleep(10)

with open(input_file) as f:
    urls = f.read().splitlines()

threads = []

with ThreadPoolExecutor(max_workers=(thread)) as pool:
    threads = [res for res in pool.map(check_url, urls)]

with open("log.txt", "w") as f:
    f.write("\n".join(logs))
with open("timeout.txt", "w") as f:
    f.write("\n".join(timeout))
with open("error.txt", "w") as f:
    f.write("\n".join(error))
with open("404.txt", "w") as f:
    f.write("\n".join(urls_404))
with open("429.txt", "w") as f:
    f.write("\n".join(urls_429))
with open("200.txt", "w") as f:
    f.write("\n".join(urls_200))
with open("504.txt", "w") as f:
    f.write("\n".join(urls_504))
with open("output.txt", "w") as f:
    f.write("\n".join(urls_accept))
with open("Available.txt", "w") as f:
    f.write("\n".join(Available))

elapsed_time = time.time() - start

print ("\nelapsed_time:{0}".format(elapsed_time) + "[sec]\n")
print("全処理完了。お疲れさまでした。")
print("Enterを押すとウィンドウを閉じます。")
input()
