#!/usr/bin/env python ################################################################ # .___ __ _______ .___ # # __| _/____ _______| | __ ____ \ _ \ __| _/____ # # / __ |\__ \\_ __ \ |/ // ___\/ /_\ \ / __ |/ __ \ # # / /_/ | / __ \| | \/ <\ \___\ \_/ \/ /_/ \ ___/ # # \____ |(______/__| |__|_ \\_____>\_____ /\_____|\____\ # # \/ \/ \/ # # ___________ ______ _ __ # # _/ ___\_ __ \_/ __ \ \/ \/ / # # \ \___| | \/\ ___/\ / # # \___ >__| \___ >\/\_/ # # est.2007 \/ \/ forum.darkc0de.com # ################################################################ # # ProxyHarvest.py v1.1 # # linux ONLY # # REQUIREMENTS: # - GeoIP Database + GeoIP Python API # - sudo apt-get install libgeoip1 && sudo apt-get install python-geoip (ubuntu/debian) # # Extract IP:Port from a proxylist site code from low1z lurking at darkc0de.com # this code is protected under the gpl get your copy at # # update from 0.9 - 1.1 notes # - fetch planetlab(codeen) proxylist & clean our list with it # - validate external ip with whatsmyip.com # - GeoIP # # - !! due to urllib1/2 limitations there is no way yet to except username/passwd input !! # # darkc0de Crew # www.darkc0de.com # code low1z # # Greetz to # d3hydr8, rsauron, baltazar, inkubus, kopele # and the rest of the Darkc0de members import sys, os, urllib, urllib2, re, httplib, sets, socket from time import time, localtime, strftime from socket import gethostbyaddr nogeoip = 0 try: import GeoIP except: nogeoip = 1 print "\nGeoIP Module/Database NOT found, try:" print "sudo apt-get install libgeoip1 && sudo apt-get install python-geoip" print "or visit www[.]maxmind[.]com for download" print "GeoIP is not required but highly recommended!\n" output = 'proxylist.txt' sleeptimer = 3 socket.setdefaulttimeout(2) alivelist = [] myipadress = urllib.urlopen('http://www.whatismyip.com/automation/n09230945.asp').read() anon_list = [] trans_list = [] planetlab = [] sites = ['http://www.darkc0de.com/cgi-bin/proxies.py', 'http://www.1proxyfree.com/', 'http://www.atomintersoft.com/products/alive-proxy/socks5-list/', 'http://www.proxylist.net/', 'http://www.proxylists.net/http_highanon.txt'] def StripTags(text): return re.sub(r'<[^>]*?>','', text) def timer(): now = strftime('%H:%M:%S-%d/%b/%Y', localtime()) return now def ipcheck(proxy): try: pxhandle = urllib2.ProxyHandler({"http": proxy}) opener = urllib2.build_opener(pxhandle) urllib2.install_opener(opener) myip = urllib2.urlopen('http://www.whatismyip.com/automation/n09230945.asp').read() xs = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}'), StripTags(myip)) if xs[0] == myipadress or myipadress == myip: trans_list.append(proxy) print proxy[:-1],"\t- ALIVE -", timer(), "- TRANSPARENT" elif xs == None: pass else: anon_list.append(proxy) print proxy[:-1],"\t- ALIVE -", timer(), "- EXT-iP :",xs[0] except KeyboardInterrupt: print "\n\nCTRL+C - check temporary proxylist file\n\n" sys.exit(0) except: pass def proxyvalidator(proxylist): finalcount = 0 for proxy in proxylist: proxy.replace('\n', '') try: proxies = {'http': "http://"+proxy[:-1]} opener = urllib.FancyURLopener(proxies) try: loopchk = opener.open("http://www.google.com").read() except: pass except(IOError,socket.timeout), detail: pass ipcheck(proxy) alivelist.append(proxy) finalcount += 1 return alivelist def getsamairdotru(): counter = 1 pxycnt = 0 maxpages = 10 urls = [] pfile = file(output, 'a') while counter <= maxpages: if counter < 10: # workaround for page-01 to page-09 opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] url = opener.open('http://www.samair.ru/proxy/proxy-0'+repr(counter)+'.htm').read() else: opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] url = opener.open('http://www.samair.ru/proxy/proxy-'+repr(counter)+'.htm').read() strings = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}:\d{1,5}'), StripTags(url)) for string in strings: pfile.write(string+"\n") pxycnt = pxycnt+1 counter = counter+1 opener.close() print pxycnt, "\t: Proxies received from : http://www.samair.ru/proxy/" pfile.close() def getsinglesitelist(site): pxycnt = 0 urls = [] pfile = file(output, 'a') opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] url = opener.open(site).read() strings = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}[:]\d{1,5}'), StripTags(url)) for string in strings: pfile.write(string+"\n") pxycnt = pxycnt+1 print pxycnt, "\t: Proxies recieved from :", site.split("//",3)[1] opener.close() pfile.close() def getplanetlabs(): opener = urllib2.build_opener() url = opener.open('http://fall.cs.princeton.edu/codeen/tabulator.cgi?table=table_all').read() strings = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}'), StripTags(url)) for string in strings: planetlab.append(string) print len(planetlab), "\t: PlanetLab Proxylist Loaded", "\n" def cleanup(): pfile = open(output, 'r').readlines() outfile = file(output, 'w') sorted = [] finalcount = 0 psremove = 0 for proxy in pfile: if proxy.split(':',1)[0] not in planetlab: if proxy not in sorted: sorted.append(proxy) outfile.write(proxy) finalcount += 1 if proxy.split(':',1)[0] in planetlab: psremove += 1 print "\n", psremove, "\t: PlanetLab (CoDeen) Proxies removed!" print finalcount,"\t: unique Proxies found\n" print "+-[Starting Validation]-----------------------------------------------------+" outfile.close() def fileConst(): fileC = open(output, 'w') falive = [] fileC.write('+ This List has been generated with proxyharvest_1.1.py // www.darkc0de.com\n') fileC.write('+ ANONYMOUS PROXIES\n\n') for anon in anon_list: fileC.write(anon) if anon in alivelist: alivelist.remove(anon) fileC.write('\n\n+ TRANSPARENT PROXIES\n\n') for trans in trans_list: fileC.write(trans) if trans in alivelist: alivelist.remove(trans) fileC.write('\n\n+ WORKING BUT UNCLEAR PROXIES\n\n') alivelist.sort() for alive in alivelist: fileC.write(alive) fileC.close() def helpme(): print "| -s / -sitecollect :: gathers proxylists |" print "| -m / -multipage :: get incremental pages |" print "| -a / -all :: do ALL!!! |" print "| -vl / - validatelist :: check a file |" print "+-----------------------------------------------+" try: os.remove(output) except: pass print "+-----------------------------------------------+" print "| ProxyHarvest.py 1.1 |" print "| low1z 2009 // darkc0de |" print "+-----------------------------------------------+" print "IP:", myipadress, "//", timer(), "\n" getplanetlabs() if len(sys.argv) <= 1: print "\n\t < use -help to get options >\n" sys.exit(1) for arg in sys.argv[1:]: if arg.lower() == "-h" or arg.lower() == "-help": helpme() if arg.lower() == "-s" or arg.lower() == "-sitecollect": for site in sites: try: getsinglesitelist(site) except: print "Error :", site cleanup() proxylist = open(output, 'r').readlines() proxyvalidator(proxylist) if arg.lower() == "-m" or arg.lower() == "-multipage": getsamairdotru() cleanup() print "may take some time to print out good proxies, be patient" try: proxylist = open(output, 'r').readlines() proxyvalidator(proxylist) except: pass if arg.lower() == "-a" or arg.lower() == "-all": try: for site in sites: getsinglesitelist(site) getsamairdotru() cleanup() proxylist = open(output, 'r').readlines() proxyvalidator(proxylist) except: print "something went wront... using -a is seems a bit buggy" if arg.lower() == "-vl" or arg.lower() == "-validatelist": try: proxyfile = open(sys.argv[2], 'r').readlines() proxyvalidator(proxyfile) except(IndexError): print "Error: check you proxy file ...\n" sys.exit(0) print "\n+-[ANON LIST]-------------------------------------------------------------+\n" for anon_proxy in anon_list: try: haddr = gethostbyaddr(anon_proxy.split(':',1)[0]) except: haddr = '-' if nogeoip == 1: print anon_proxy.replace('\n',''),"\t| HostAdress:", haddr[0] pass elif nogeoip == 0: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) gx = gi.country_code_by_addr(anon_proxy.split(':',1)[0]) print anon_proxy.replace('\n',''), "\t| Country:", gx,"\t| HostAdress:", haddr[0] print "\n\t", len(anon_list), ": Total tested AnonProxies\n" print "+-[TRANS LIST]--------------------------------------------------------------+\n" for trans_proxy in trans_list: if nogeoip == 1: print trans_proxy.replace('\n','') pass elif nogeoip == 0: gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE) gx = gi.country_code_by_addr(trans_proxy.split(':',1)[0]) print trans_proxy.replace('\n',''), "\t| Country:", gx print "\n\t", len(trans_list), ": Total tested Transparent Proxies\n" print "+-[OTHER SERVERS]-----------------------------------------------------------+\n" if len(alivelist) > 16: print len(alivelist), "Alive but unverified Servers, check", output else: for alive in alivelist: if alive not in trans_list: if alive not in anon_list: print alive.replace('\n','') fileConst()