proxyscrape return none #34

VReunov · 2021-08-28T17:08:43Z

>>> import proxyscrape
>>> collector = proxyscrape.create_collector('default', 'http')
>>> proxy = collector.get_proxy({'country': 'united states'})
>>> print(proxy)
None

Python 3.9.6

The text was updated successfully, but these errors were encountered:

akguthal · 2021-09-01T14:31:43Z

I'm also having this issue

Mark7888 · 2021-09-01T21:43:49Z

same here

chikko80 · 2022-05-19T06:29:25Z

Do you guys find a solutions?

yoarch · 2022-11-10T13:53:09Z

Still same error.

8xu · 2023-07-07T17:37:26Z

Having the same issue. I think this project is not maintained anymore..

ydeng11 · 2024-03-07T04:01:22Z

Same issue. I think it is dead.
I fork it and it is working now proxyscrape

zero-stroke · 2024-06-23T20:00:06Z

Same issue. I think it is dead. I fork it and it is working now proxyscrape

Your version works, but I dont know why or if I'm missing something but almost no free proxies work :( and I even made a multithreaded script to mass verify them

"""
python version >= 3.11
"""
import concurrent
import json
import os
import subprocess
import time
from concurrent.futures import ThreadPoolExecutor
from enum import auto, StrEnum
from threading import Lock

import proxyscrape
import requests
import datetime
from proxyscrape import Proxy
from requests.exceptions import ProxyError, ReadTimeout, SSLError, ConnectionError as ConnectionError_

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'Cache-Control': 'max-age=0'
}


sleeptime = 1
ip_location_url = "https://www.iplocation.net/"
ipify_url = "https://api.ipify.org/?format=json"
timeout = 15

try:
    base_ip_address: str = requests.get(url=ipify_url, headers=headers).text
except SSLError as e:
    print("Error getting ip")


class ProxyType(StrEnum):
    http = auto()
    https = auto()
    socks4 = auto()
    socks5 = auto()


def get_api_proxies(proxy_type) -> list[str]:
    proxy_response = requests.get(
        f"https://api.proxyscrape.com/?request=displayproxies"
        f"&proxytype={proxy_type}"
        "&timeout=10000"
        "&country=all"
        "&ssl=all"
        "&anonymity=all")

    all_proxies: list[str] = proxy_response.text.split("\r\n")[:-1]

    for proxy in all_proxies:
        print(proxy)
    return all_proxies


def check_proxy(raw_proxy, proxy_type, selenium_check):
    print(f"\nProxy: {raw_proxy}")
    host, port = raw_proxy.split(":")  # 1.94.31.35:8888
    proxy = Proxy(host=host, port=port, code='us', country='', anonymous='T', type=proxy_type, source='')
    if validate_proxies(proxy, selenium_check):
        return proxy
    return None


def validate_proxies_multithreaded(all_proxies, proxy_type, selenium_check) -> list:
    good_proxies = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for proxy in executor.map(check_proxy, all_proxies, [proxy_type] * len(all_proxies),
                                  [selenium_check] * len(all_proxies)):
            if proxy is not None:
                good_proxies.append(proxy)
    return good_proxies


def test_proxies(selenium_check: bool, use_api: bool) -> list[Proxy]:
    """
        Quickly filter through list of proxies and find only the ones that have valid 200 response codes.
    Then verify that the ip address does indeed change.
    Then try with selenium.

    Example Proxy from proxyscrape collector:
    Proxy(host='1.2.7.9',port='32',code='us', country='iran', anonymous=T, type='https', source='sslproxies')

    Parameters
    ----------
    selenium_check : bool
        To check using selenium or not.
    use_api : bool


    Returns
    -------

    """
    good_proxies: list[Proxy] = []

    if use_api:
        proxy_type = ProxyType.socks4  # socks5 will only yield a handful
        all_proxies: list[str] = get_api_proxies(proxy_type)
        print(f"Looping through {len(all_proxies)} proxies")

        good_proxies = validate_proxies_multithreaded(all_proxies, proxy_type, selenium_check)

        # for raw_proxy in all_proxies:
        #     print(f"\nProxy: {raw_proxy}")
        #     host, port = raw_proxy.split(":")  # 1.94.31.35:8888
        #     proxy: Proxy = Proxy(host=host, port=port, code='us', country='', anonymous='T', type=proxy_type, source='')
        #     if validate_proxies(proxy, selenium_check):
        #         good_proxies.append(proxy)

        print(f"Percentage of good proxies: {len(good_proxies) / len(all_proxies)}")
    else:
        quality_proxy_types = (ProxyType.https, ProxyType.socks4, ProxyType.socks5)
        collector = proxyscrape.create_collector('default', quality_proxy_types)
        num_valid_proxies = 5
        max_workers = 25

        good_proxies_lock = Lock()

        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = set()

            while len(good_proxies) < num_valid_proxies:
                while len(futures) < max_workers:
                    proxy = collector.get_proxy()
                    print(f"\nProxy: {proxy}")
                    futures.add(executor.submit(validate_proxies, proxy, selenium_check))

                done, futures = concurrent.futures.wait(
                    futures,
                    return_when=concurrent.futures.FIRST_COMPLETED
                )

                for future in done:
                    result = future.result()
                    if result:
                        with good_proxies_lock:
                            good_proxies.append(result)

    date_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")  # '2024-06-22 17:34'
    filename = f"good_proxies{date_time}.json"
    with open(filename, 'w') as f:
        json.dump([proxy._asdict() for proxy in good_proxies], f, indent=4)

    print(f"Number of good proxies: {len(good_proxies)}")
    return good_proxies


def validate_proxies(proxy: Proxy, selenium_check: bool) -> Proxy | None:
    proxies = {
        "http": f"{proxy.type}://{proxy.host}:{proxy.port}",
        "https": f"{proxy.type}://{proxy.host}:{proxy.port}"
    }

    # ----- Check with requests -----
    try:
        response: requests.Response = requests.get(ip_location_url, headers=headers, proxies=proxies, timeout=7)
    except ProxyError:
        print("Proxy error")
        return None
    except SSLError:
        print("SLLError")
        return None
    except ConnectionError_:
        print("ConnectionError")
        return None
    except ReadTimeout:
        print("ReadTimeout error")
        return None
    if response.status_code != 200:
        print("Site rejected proxy")
        return None
    else:
        print("PASSED ip location check")

    response: requests.Response = requests.get(ipify_url, headers=headers, proxies=proxies)
    if response.text == base_ip_address:
        print("Didn't change ip")
        return None
    else:
        print("PASSED ip change check")

    # ----- Check with selenium -----
    if selenium_check:  # Likely not necessary, extra check if you want it and have the setup. 
        from data_agg_constants import adblock_path
        from selenium import webdriver
        from selenium.common import TimeoutException
        from selenium.webdriver.chrome.service import Service
        from selenium.webdriver.common.by import By
        from selenium.webdriver.support import expected_conditions as EC
        from selenium.webdriver.support.wait import WebDriverWait

        import undetected_chromedriver as uc

        chrome_126_path = f"C:/Users/{os.getlogin()}/Downloads/chrome-win64 (2)/chrome-win64/chrome.exe"
        options = webdriver.ChromeOptions()
        options.binary_location = chrome_126_path
        options.add_extension(adblock_path)

        options.add_argument(f'--proxy-server=http://{proxy.host}:{proxy.port}')

        driver = uc.Chrome(service=Service(), options=options)  # noqa
        driver.set_page_load_timeout(timeout)

        try:
            driver.get(ip_location_url)
        except (TimeoutException, Exception):
            print(f"Loading failed with proxy {proxy.host}:{proxy.port}, trying a new proxy...")
            try:
                subprocess.run(["taskkill", "/F", "/IM", "chrome.exe"], check=True)
            except subprocess.CalledProcessError as err:
                if err.returncode == 128:  # ERROR_INVALID_HANDLE (0x80000003L)
                    print("Chrome.exe is not running.")
                else:
                    print(f"Error occurred: {err}")
                return None
            time.sleep(sleeptime)

        WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
        print("PASSED: Page loaded successfully")
        time.sleep(40)
        return proxy


def main():
    good_proxies = test_proxies(selenium_check=False, use_api=True)
    print(f"Good proxies: {good_proxies}")


if __name__ == '__main__':
    main()

oh and I couldnt open an issue on it for some reason so I posted here lol

ydeng11 · 2024-06-26T05:45:30Z

I think these sources are not working any more. These free proxies are barely click bait.
These websites have the same structure and it leads me to think they are from the same source/owner.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

proxyscrape return none #34

proxyscrape return none #34

VReunov commented Aug 28, 2021 •

edited

Loading

akguthal commented Sep 1, 2021

Mark7888 commented Sep 1, 2021

chikko80 commented May 19, 2022

yoarch commented Nov 10, 2022

8xu commented Jul 7, 2023

ydeng11 commented Mar 7, 2024 •

edited

Loading

zero-stroke commented Jun 23, 2024 •

edited

Loading

ydeng11 commented Jun 26, 2024 •

edited

Loading

proxyscrape return none #34

proxyscrape return none #34

Comments

VReunov commented Aug 28, 2021 • edited Loading

akguthal commented Sep 1, 2021

Mark7888 commented Sep 1, 2021

chikko80 commented May 19, 2022

yoarch commented Nov 10, 2022

8xu commented Jul 7, 2023

ydeng11 commented Mar 7, 2024 • edited Loading

zero-stroke commented Jun 23, 2024 • edited Loading

ydeng11 commented Jun 26, 2024 • edited Loading

VReunov commented Aug 28, 2021 •

edited

Loading

ydeng11 commented Mar 7, 2024 •

edited

Loading

zero-stroke commented Jun 23, 2024 •

edited

Loading

ydeng11 commented Jun 26, 2024 •

edited

Loading