通过套接字提供代理应用程序 Tornado 4.4
Serve proxy app through sockets Tornado 4.4
我有一个小问题。
我是整个套接字理论和 Tornado 框架的新手。最近我使用 Tornado 框架编写了一个代理服务器。我的应用程序位于连接客户端(浏览器)<===> 远程地址的中间。所以连接看起来像这样:
client(my web browser) <=== PROXY(my app) ===> Remote address (whosebug.com)
如果我使用标准的“main”函数,一切正常。但我想更底层一些,我的意思是我想创建套接字并通过该连接提供我的代理应用程序。
我的代理应用 urls:
# coding: utf-8
"""URL's for proxy app."""
from settings import settings
from tornado.web import (
StaticFileHandler,
url,
)
from handlers import (
mainHandlers,
myProxy,
)
urls = [
url(r"/admin/$", mainHandlers.MainHandler),
url(r"/admin/delete_filter/", mainHandlers.DataDeleteHandler),
url(r"/admin/filters/$", mainHandlers.DataGetter),
url(r"/admin/new_filter/$", mainHandlers.FormHandler),
url(r"/admin/stats/$", mainHandlers.StatsTableHandler),
url(r"/admin/stats/query/$", mainHandlers.AjaxStatsGetHandler),
url(r"/static/", StaticFileHandler, dict(path=settings['static_path'])),
url(r'.*', myProxy.ProxyHandler),
]
我的代理处理程序:
class ProxyHandler(tornado.web.RequestHandler):
SUPPORTED_METHODS = ['GET', 'POST']
def data_received(self, chunk):
pass
def compute_etag(self):
return None # disable tornado Etag
def handle_response(self, response):
if response.error and not isinstance(response.error, tornado.httpclient.HTTPError):
self.set_status(500)
self.write('Internal server error:\n' + str(response.error))
else:
self.set_status(response.code, response.reason)
self._headers = tornado.httputil.HTTPHeaders() # clear tornado default header
for header, v in response.headers.get_all():
if header not in ('Content-Length', 'Transfer-Encoding', 'Content-Encoding', 'Connection'):
self.add_header(header, v) # some header appear multiple times, eg 'Set-Cookie'
secured_page = False
for page in secure_pages:
if page in self.request.uri:
secured_page = True
self.set_header('Content-Length', len(response.body))
self.write(response.body)
break
if response.body and not secured_page:
c.execute('SELECT filter_name FROM filters WHERE filter_type=1')
tags = c.fetchall()
soup = BeautifulSoup(response.body, 'html.parser')
for row in tags:
catched_tags = soup.find_all(str(row[0]))
if catched_tags:
print 'catched: %s of <%s> tags' % (len(catched_tags), str(row[0]))
for tag in catched_tags:
tag.extract()
new_body = str(soup)
self.set_header('Content-Length', len(new_body))
self.write(new_body)
self.finish()
@tornado.web.asynchronous
def get(self):
logger.debug('Handle %s request to %s', self.request.method, self.request.uri)
body = self.request.body
if not body:
body = None
try:
if 'Proxy-Connection' in self.request.headers:
del self.request.headers['Proxy-Connection']
c.execute('SELECT filter_name FROM filters WHERE filter_type=2')
urls = c.fetchall()
for url in urls:
if url[0] in self.request.path:
self.set_status(403)
self.finish()
return
fetch_request(self.request.uri, self.handle_response,
method=self.request.method, body=body, headers=self.request.headers, follow_redirects=False,
allow_nonstandard_methods=True)
except tornado.httpclient.HTTPError as e:
if hasattr(e, 'response') and e.response:
self.handle_response(e.response)
else:
self.set_status(500)
self.write('Internal server error:\n' + str(e))
self.finish()
@tornado.web.asynchronous
def post(self):
return self.get()
简单的主函数:
# coding: utf-8
import sys
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
from proxy.urls import proxy_urls
def make_app():
"""Create my application with my settings and urls."""
return MyApplication(proxy_urls)
if __name__ == "__main__":
u"""Main loop."""
app = make_app()
port = options.port
if len(sys.argv) > 1:
port = int(sys.argv[1])
app.listen(port)
print 'tornado working on port %s' % port
tornado.ioloop.IOLoop.current().start()
所以我想根据文档把简单的方法改成低级的方法:
import errno
import functools
import tornado.ioloop
import socket
def connection_ready(sock, fd, events):
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
raise
return
connection.setblocking(0)
handle_connection(connection, address)
if __name__ == '__main__':
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setblocking(0)
sock.bind(("", port))
sock.listen(128)
io_loop = tornado.ioloop.IOLoop.current()
callback = functools.partial(connection_ready, sock)
io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
io_loop.start()
为了做到这一点,我阅读了一些有关使用套接字进行整个网络编程的内容 (https://www.tutorialspoint.com/python/python_networking.htm)。
教程中的示例运行良好,因此我尝试将教程与 Tornado 文档中的示例联系起来:
# coding: utf-8
import errno
import functools
import socket
import sys
import tornado.httpserver
import tornado.ioloop
import tornado.netutil
import tornado.process
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
def make_app():
u"""Create my application with my settings and urls."""
return MyApplication()
def connection_ready(sock, fd, events):
u"""Function to handle an incoming connection."""
proxy_app = make_app()
server = tornado.httpserver.HTTPServer(proxy_app)
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
raise
return
print 'Got connection from', address
# connection.setblocking(False)
connection.send(server)
connection.close()
if __name__ == "__main__":
u"""Main loop."""
port = options.port
if len(sys.argv) > 1:
port = int(sys.argv[1])
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setblocking(False)
sock.bind(('', port))
sock.listen(5)
io_loop = tornado.ioloop.IOLoop.current()
callback = functools.partial(connection_ready, sock)
io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
print 'Tornado Proxy working on port: %s' % port
io_loop.start()
但是当我尝试连接到我的代理时(例如添加过滤器:http://127.0.0.1:8000/admin/filters/ - 我编写了一个处理程序来处理这个 url)
我收到特定错误:
ERROR:tornado.application:Exception in callback (3, )
Traceback (most recent call last):
File "/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "manage.py", line 35, in connection_ready
connection.send(server)
TypeError: send() argument 1 must be convertible to a buffer, not HTTPServer
我明白我不能通过连接发送 HTTPServer(从一端到另一端),它必须是一个缓冲区。
我的第一个想法是从处理 URL 的处理程序发送缓冲区(例如 class ProxyHandler(tornado.web.RequestHandler)),
但我如何处理不同的 url 和不同的处理程序?
经过一些方法后,我将作为 Tornado 应用程序编写的代理应用程序更改为纯 Python 代码,该代码处理来自远程地址的响应并执行过滤操作。我认为这是我能做的最好最快的事情。
我有一个小问题。 我是整个套接字理论和 Tornado 框架的新手。最近我使用 Tornado 框架编写了一个代理服务器。我的应用程序位于连接客户端(浏览器)<===> 远程地址的中间。所以连接看起来像这样:
client(my web browser) <=== PROXY(my app) ===> Remote address (whosebug.com)
如果我使用标准的“main”函数,一切正常。但我想更底层一些,我的意思是我想创建套接字并通过该连接提供我的代理应用程序。
我的代理应用 urls:
# coding: utf-8
"""URL's for proxy app."""
from settings import settings
from tornado.web import (
StaticFileHandler,
url,
)
from handlers import (
mainHandlers,
myProxy,
)
urls = [
url(r"/admin/$", mainHandlers.MainHandler),
url(r"/admin/delete_filter/", mainHandlers.DataDeleteHandler),
url(r"/admin/filters/$", mainHandlers.DataGetter),
url(r"/admin/new_filter/$", mainHandlers.FormHandler),
url(r"/admin/stats/$", mainHandlers.StatsTableHandler),
url(r"/admin/stats/query/$", mainHandlers.AjaxStatsGetHandler),
url(r"/static/", StaticFileHandler, dict(path=settings['static_path'])),
url(r'.*', myProxy.ProxyHandler),
]
我的代理处理程序:
class ProxyHandler(tornado.web.RequestHandler):
SUPPORTED_METHODS = ['GET', 'POST']
def data_received(self, chunk):
pass
def compute_etag(self):
return None # disable tornado Etag
def handle_response(self, response):
if response.error and not isinstance(response.error, tornado.httpclient.HTTPError):
self.set_status(500)
self.write('Internal server error:\n' + str(response.error))
else:
self.set_status(response.code, response.reason)
self._headers = tornado.httputil.HTTPHeaders() # clear tornado default header
for header, v in response.headers.get_all():
if header not in ('Content-Length', 'Transfer-Encoding', 'Content-Encoding', 'Connection'):
self.add_header(header, v) # some header appear multiple times, eg 'Set-Cookie'
secured_page = False
for page in secure_pages:
if page in self.request.uri:
secured_page = True
self.set_header('Content-Length', len(response.body))
self.write(response.body)
break
if response.body and not secured_page:
c.execute('SELECT filter_name FROM filters WHERE filter_type=1')
tags = c.fetchall()
soup = BeautifulSoup(response.body, 'html.parser')
for row in tags:
catched_tags = soup.find_all(str(row[0]))
if catched_tags:
print 'catched: %s of <%s> tags' % (len(catched_tags), str(row[0]))
for tag in catched_tags:
tag.extract()
new_body = str(soup)
self.set_header('Content-Length', len(new_body))
self.write(new_body)
self.finish()
@tornado.web.asynchronous
def get(self):
logger.debug('Handle %s request to %s', self.request.method, self.request.uri)
body = self.request.body
if not body:
body = None
try:
if 'Proxy-Connection' in self.request.headers:
del self.request.headers['Proxy-Connection']
c.execute('SELECT filter_name FROM filters WHERE filter_type=2')
urls = c.fetchall()
for url in urls:
if url[0] in self.request.path:
self.set_status(403)
self.finish()
return
fetch_request(self.request.uri, self.handle_response,
method=self.request.method, body=body, headers=self.request.headers, follow_redirects=False,
allow_nonstandard_methods=True)
except tornado.httpclient.HTTPError as e:
if hasattr(e, 'response') and e.response:
self.handle_response(e.response)
else:
self.set_status(500)
self.write('Internal server error:\n' + str(e))
self.finish()
@tornado.web.asynchronous
def post(self):
return self.get()
简单的主函数:
# coding: utf-8
import sys
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
from proxy.urls import proxy_urls
def make_app():
"""Create my application with my settings and urls."""
return MyApplication(proxy_urls)
if __name__ == "__main__":
u"""Main loop."""
app = make_app()
port = options.port
if len(sys.argv) > 1:
port = int(sys.argv[1])
app.listen(port)
print 'tornado working on port %s' % port
tornado.ioloop.IOLoop.current().start()
所以我想根据文档把简单的方法改成低级的方法:
import errno
import functools
import tornado.ioloop
import socket
def connection_ready(sock, fd, events):
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
raise
return
connection.setblocking(0)
handle_connection(connection, address)
if __name__ == '__main__':
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setblocking(0)
sock.bind(("", port))
sock.listen(128)
io_loop = tornado.ioloop.IOLoop.current()
callback = functools.partial(connection_ready, sock)
io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
io_loop.start()
为了做到这一点,我阅读了一些有关使用套接字进行整个网络编程的内容 (https://www.tutorialspoint.com/python/python_networking.htm)。 教程中的示例运行良好,因此我尝试将教程与 Tornado 文档中的示例联系起来:
# coding: utf-8
import errno
import functools
import socket
import sys
import tornado.httpserver
import tornado.ioloop
import tornado.netutil
import tornado.process
import tornado.web
from tornado.options import options
from configuration.application import MyApplication
def make_app():
u"""Create my application with my settings and urls."""
return MyApplication()
def connection_ready(sock, fd, events):
u"""Function to handle an incoming connection."""
proxy_app = make_app()
server = tornado.httpserver.HTTPServer(proxy_app)
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] not in (errno.EWOULDBLOCK, errno.EAGAIN):
raise
return
print 'Got connection from', address
# connection.setblocking(False)
connection.send(server)
connection.close()
if __name__ == "__main__":
u"""Main loop."""
port = options.port
if len(sys.argv) > 1:
port = int(sys.argv[1])
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setblocking(False)
sock.bind(('', port))
sock.listen(5)
io_loop = tornado.ioloop.IOLoop.current()
callback = functools.partial(connection_ready, sock)
io_loop.add_handler(sock.fileno(), callback, io_loop.READ)
print 'Tornado Proxy working on port: %s' % port
io_loop.start()
但是当我尝试连接到我的代理时(例如添加过滤器:http://127.0.0.1:8000/admin/filters/ - 我编写了一个处理程序来处理这个 url) 我收到特定错误:
ERROR:tornado.application:Exception in callback (3, )
Traceback (most recent call last):
File "/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start handler_func(fd_obj, events)
File "/home/dave/.virtualenvs/teleV1/local/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper return fn(*args, **kwargs)
File "manage.py", line 35, in connection_ready connection.send(server)
TypeError: send() argument 1 must be convertible to a buffer, not HTTPServer
我明白我不能通过连接发送 HTTPServer(从一端到另一端),它必须是一个缓冲区。 我的第一个想法是从处理 URL 的处理程序发送缓冲区(例如 class ProxyHandler(tornado.web.RequestHandler)), 但我如何处理不同的 url 和不同的处理程序?
经过一些方法后,我将作为 Tornado 应用程序编写的代理应用程序更改为纯 Python 代码,该代码处理来自远程地址的响应并执行过滤操作。我认为这是我能做的最好最快的事情。