You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1267 lines
46KB

  1. # This is part of Python source code with Eventlet-specific modifications.
  2. #
  3. # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
  4. # 2011, 2012, 2013, 2014, 2015, 2016 Python Software Foundation; All Rights
  5. # Reserved
  6. #
  7. # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
  8. # --------------------------------------------
  9. #
  10. # 1. This LICENSE AGREEMENT is between the Python Software Foundation
  11. # ("PSF"), and the Individual or Organization ("Licensee") accessing and
  12. # otherwise using this software ("Python") in source or binary form and
  13. # its associated documentation.
  14. #
  15. # 2. Subject to the terms and conditions of this License Agreement, PSF hereby
  16. # grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
  17. # analyze, test, perform and/or display publicly, prepare derivative works,
  18. # distribute, and otherwise use Python alone or in any derivative version,
  19. # provided, however, that PSF's License Agreement and PSF's notice of copyright,
  20. # i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
  21. # 2011, 2012, 2013, 2014, 2015, 2016 Python Software Foundation; All Rights
  22. # Reserved" are retained in Python alone or in any derivative version prepared by
  23. # Licensee.
  24. #
  25. # 3. In the event Licensee prepares a derivative work that is based on
  26. # or incorporates Python or any part thereof, and wants to make
  27. # the derivative work available to others as provided herein, then
  28. # Licensee hereby agrees to include in any such work a brief summary of
  29. # the changes made to Python.
  30. #
  31. # 4. PSF is making Python available to Licensee on an "AS IS"
  32. # basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
  33. # IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
  34. # DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
  35. # FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
  36. # INFRINGE ANY THIRD PARTY RIGHTS.
  37. #
  38. # 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
  39. # FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
  40. # A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
  41. # OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
  42. #
  43. # 6. This License Agreement will automatically terminate upon a material
  44. # breach of its terms and conditions.
  45. #
  46. # 7. Nothing in this License Agreement shall be deemed to create any
  47. # relationship of agency, partnership, or joint venture between PSF and
  48. # Licensee. This License Agreement does not grant permission to use PSF
  49. # trademarks or trade name in a trademark sense to endorse or promote
  50. # products or services of Licensee, or any third party.
  51. #
  52. # 8. By copying, installing or otherwise using Python, Licensee
  53. # agrees to be bound by the terms and conditions of this License
  54. # Agreement.
  55. """HTTP server classes.
  56. Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
  57. SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
  58. and CGIHTTPRequestHandler for CGI scripts.
  59. It does, however, optionally implement HTTP/1.1 persistent connections,
  60. as of version 0.3.
  61. Notes on CGIHTTPRequestHandler
  62. ------------------------------
  63. This class implements GET and POST requests to cgi-bin scripts.
  64. If the os.fork() function is not present (e.g. on Windows),
  65. subprocess.Popen() is used as a fallback, with slightly altered semantics.
  66. In all cases, the implementation is intentionally naive -- all
  67. requests are executed synchronously.
  68. SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
  69. -- it may execute arbitrary Python code or external programs.
  70. Note that status code 200 is sent prior to execution of a CGI script, so
  71. scripts cannot send other status codes such as 302 (redirect).
  72. XXX To do:
  73. - log requests even later (to capture byte count)
  74. - log user-agent header and other interesting goodies
  75. - send error log to separate file
  76. """
  77. # See also:
  78. #
  79. # HTTP Working Group T. Berners-Lee
  80. # INTERNET-DRAFT R. T. Fielding
  81. # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
  82. # Expires September 8, 1995 March 8, 1995
  83. #
  84. # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
  85. #
  86. # and
  87. #
  88. # Network Working Group R. Fielding
  89. # Request for Comments: 2616 et al
  90. # Obsoletes: 2068 June 1999
  91. # Category: Standards Track
  92. #
  93. # URL: http://www.faqs.org/rfcs/rfc2616.html
  94. # Log files
  95. # ---------
  96. #
  97. # Here's a quote from the NCSA httpd docs about log file format.
  98. #
  99. # | The logfile format is as follows. Each line consists of:
  100. # |
  101. # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
  102. # |
  103. # | host: Either the DNS name or the IP number of the remote client
  104. # | rfc931: Any information returned by identd for this person,
  105. # | - otherwise.
  106. # | authuser: If user sent a userid for authentication, the user name,
  107. # | - otherwise.
  108. # | DD: Day
  109. # | Mon: Month (calendar name)
  110. # | YYYY: Year
  111. # | hh: hour (24-hour format, the machine's timezone)
  112. # | mm: minutes
  113. # | ss: seconds
  114. # | request: The first line of the HTTP request as sent by the client.
  115. # | ddd: the status code returned by the server, - if not available.
  116. # | bbbb: the total number of bytes sent,
  117. # | *not including the HTTP/1.0 header*, - if not available
  118. # |
  119. # | You can determine the name of the file accessed through request.
  120. #
  121. # (Actually, the latter is only true if you know the server configuration
  122. # at the time the request was made!)
  123. __version__ = "0.6"
  124. __all__ = [
  125. "HTTPServer", "BaseHTTPRequestHandler",
  126. "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
  127. ]
  128. import email.utils
  129. import html
  130. import io
  131. import mimetypes
  132. import posixpath
  133. import shutil
  134. import sys
  135. import urllib.parse
  136. import copy
  137. import argparse
  138. from eventlet.green import (
  139. os,
  140. time,
  141. select,
  142. socket,
  143. SocketServer as socketserver,
  144. subprocess,
  145. )
  146. from eventlet.green.http import client as http_client, HTTPStatus
  147. # Default error message template
  148. DEFAULT_ERROR_MESSAGE = """\
  149. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
  150. "http://www.w3.org/TR/html4/strict.dtd">
  151. <html>
  152. <head>
  153. <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
  154. <title>Error response</title>
  155. </head>
  156. <body>
  157. <h1>Error response</h1>
  158. <p>Error code: %(code)d</p>
  159. <p>Message: %(message)s.</p>
  160. <p>Error code explanation: %(code)s - %(explain)s.</p>
  161. </body>
  162. </html>
  163. """
  164. DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
  165. class HTTPServer(socketserver.TCPServer):
  166. allow_reuse_address = 1 # Seems to make sense in testing environment
  167. def server_bind(self):
  168. """Override server_bind to store the server name."""
  169. socketserver.TCPServer.server_bind(self)
  170. host, port = self.server_address[:2]
  171. self.server_name = socket.getfqdn(host)
  172. self.server_port = port
  173. class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
  174. """HTTP request handler base class.
  175. The following explanation of HTTP serves to guide you through the
  176. code as well as to expose any misunderstandings I may have about
  177. HTTP (so you don't need to read the code to figure out I'm wrong
  178. :-).
  179. HTTP (HyperText Transfer Protocol) is an extensible protocol on
  180. top of a reliable stream transport (e.g. TCP/IP). The protocol
  181. recognizes three parts to a request:
  182. 1. One line identifying the request type and path
  183. 2. An optional set of RFC-822-style headers
  184. 3. An optional data part
  185. The headers and data are separated by a blank line.
  186. The first line of the request has the form
  187. <command> <path> <version>
  188. where <command> is a (case-sensitive) keyword such as GET or POST,
  189. <path> is a string containing path information for the request,
  190. and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
  191. <path> is encoded using the URL encoding scheme (using %xx to signify
  192. the ASCII character with hex code xx).
  193. The specification specifies that lines are separated by CRLF but
  194. for compatibility with the widest range of clients recommends
  195. servers also handle LF. Similarly, whitespace in the request line
  196. is treated sensibly (allowing multiple spaces between components
  197. and allowing trailing whitespace).
  198. Similarly, for output, lines ought to be separated by CRLF pairs
  199. but most clients grok LF characters just fine.
  200. If the first line of the request has the form
  201. <command> <path>
  202. (i.e. <version> is left out) then this is assumed to be an HTTP
  203. 0.9 request; this form has no optional headers and data part and
  204. the reply consists of just the data.
  205. The reply form of the HTTP 1.x protocol again has three parts:
  206. 1. One line giving the response code
  207. 2. An optional set of RFC-822-style headers
  208. 3. The data
  209. Again, the headers and data are separated by a blank line.
  210. The response code line has the form
  211. <version> <responsecode> <responsestring>
  212. where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
  213. <responsecode> is a 3-digit response code indicating success or
  214. failure of the request, and <responsestring> is an optional
  215. human-readable string explaining what the response code means.
  216. This server parses the request and the headers, and then calls a
  217. function specific to the request type (<command>). Specifically,
  218. a request SPAM will be handled by a method do_SPAM(). If no
  219. such method exists the server sends an error response to the
  220. client. If it exists, it is called with no arguments:
  221. do_SPAM()
  222. Note that the request name is case sensitive (i.e. SPAM and spam
  223. are different requests).
  224. The various request details are stored in instance variables:
  225. - client_address is the client IP address in the form (host,
  226. port);
  227. - command, path and version are the broken-down request line;
  228. - headers is an instance of email.message.Message (or a derived
  229. class) containing the header information;
  230. - rfile is a file object open for reading positioned at the
  231. start of the optional input data part;
  232. - wfile is a file object open for writing.
  233. IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
  234. The first thing to be written must be the response line. Then
  235. follow 0 or more header lines, then a blank line, and then the
  236. actual data (if any). The meaning of the header lines depends on
  237. the command executed by the server; in most cases, when data is
  238. returned, there should be at least one header line of the form
  239. Content-type: <type>/<subtype>
  240. where <type> and <subtype> should be registered MIME types,
  241. e.g. "text/html" or "text/plain".
  242. """
  243. # The Python system version, truncated to its first component.
  244. sys_version = "Python/" + sys.version.split()[0]
  245. # The server software version. You may want to override this.
  246. # The format is multiple whitespace-separated strings,
  247. # where each string is of the form name[/version].
  248. server_version = "BaseHTTP/" + __version__
  249. error_message_format = DEFAULT_ERROR_MESSAGE
  250. error_content_type = DEFAULT_ERROR_CONTENT_TYPE
  251. # The default request version. This only affects responses up until
  252. # the point where the request line is parsed, so it mainly decides what
  253. # the client gets back when sending a malformed request line.
  254. # Most web servers default to HTTP 0.9, i.e. don't send a status line.
  255. default_request_version = "HTTP/0.9"
  256. def parse_request(self):
  257. """Parse a request (internal).
  258. The request should be stored in self.raw_requestline; the results
  259. are in self.command, self.path, self.request_version and
  260. self.headers.
  261. Return True for success, False for failure; on failure, an
  262. error is sent back.
  263. """
  264. self.command = None # set in case of error on the first line
  265. self.request_version = version = self.default_request_version
  266. self.close_connection = True
  267. requestline = str(self.raw_requestline, 'iso-8859-1')
  268. requestline = requestline.rstrip('\r\n')
  269. self.requestline = requestline
  270. words = requestline.split()
  271. if len(words) == 3:
  272. command, path, version = words
  273. try:
  274. if version[:5] != 'HTTP/':
  275. raise ValueError
  276. base_version_number = version.split('/', 1)[1]
  277. version_number = base_version_number.split(".")
  278. # RFC 2145 section 3.1 says there can be only one "." and
  279. # - major and minor numbers MUST be treated as
  280. # separate integers;
  281. # - HTTP/2.4 is a lower version than HTTP/2.13, which in
  282. # turn is lower than HTTP/12.3;
  283. # - Leading zeros MUST be ignored by recipients.
  284. if len(version_number) != 2:
  285. raise ValueError
  286. version_number = int(version_number[0]), int(version_number[1])
  287. except (ValueError, IndexError):
  288. self.send_error(
  289. HTTPStatus.BAD_REQUEST,
  290. "Bad request version (%r)" % version)
  291. return False
  292. if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
  293. self.close_connection = False
  294. if version_number >= (2, 0):
  295. self.send_error(
  296. HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
  297. "Invalid HTTP version (%s)" % base_version_number)
  298. return False
  299. elif len(words) == 2:
  300. command, path = words
  301. self.close_connection = True
  302. if command != 'GET':
  303. self.send_error(
  304. HTTPStatus.BAD_REQUEST,
  305. "Bad HTTP/0.9 request type (%r)" % command)
  306. return False
  307. elif not words:
  308. return False
  309. else:
  310. self.send_error(
  311. HTTPStatus.BAD_REQUEST,
  312. "Bad request syntax (%r)" % requestline)
  313. return False
  314. self.command, self.path, self.request_version = command, path, version
  315. # Examine the headers and look for a Connection directive.
  316. try:
  317. self.headers = http_client.parse_headers(self.rfile,
  318. _class=self.MessageClass)
  319. except http_client.LineTooLong as err:
  320. self.send_error(
  321. HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
  322. "Line too long",
  323. str(err))
  324. return False
  325. except http_client.HTTPException as err:
  326. self.send_error(
  327. HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
  328. "Too many headers",
  329. str(err)
  330. )
  331. return False
  332. conntype = self.headers.get('Connection', "")
  333. if conntype.lower() == 'close':
  334. self.close_connection = True
  335. elif (conntype.lower() == 'keep-alive' and
  336. self.protocol_version >= "HTTP/1.1"):
  337. self.close_connection = False
  338. # Examine the headers and look for an Expect directive
  339. expect = self.headers.get('Expect', "")
  340. if (expect.lower() == "100-continue" and
  341. self.protocol_version >= "HTTP/1.1" and
  342. self.request_version >= "HTTP/1.1"):
  343. if not self.handle_expect_100():
  344. return False
  345. return True
  346. def handle_expect_100(self):
  347. """Decide what to do with an "Expect: 100-continue" header.
  348. If the client is expecting a 100 Continue response, we must
  349. respond with either a 100 Continue or a final response before
  350. waiting for the request body. The default is to always respond
  351. with a 100 Continue. You can behave differently (for example,
  352. reject unauthorized requests) by overriding this method.
  353. This method should either return True (possibly after sending
  354. a 100 Continue response) or send an error response and return
  355. False.
  356. """
  357. self.send_response_only(HTTPStatus.CONTINUE)
  358. self.end_headers()
  359. return True
  360. def handle_one_request(self):
  361. """Handle a single HTTP request.
  362. You normally don't need to override this method; see the class
  363. __doc__ string for information on how to handle specific HTTP
  364. commands such as GET and POST.
  365. """
  366. try:
  367. self.raw_requestline = self.rfile.readline(65537)
  368. if len(self.raw_requestline) > 65536:
  369. self.requestline = ''
  370. self.request_version = ''
  371. self.command = ''
  372. self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
  373. return
  374. if not self.raw_requestline:
  375. self.close_connection = True
  376. return
  377. if not self.parse_request():
  378. # An error code has been sent, just exit
  379. return
  380. mname = 'do_' + self.command
  381. if not hasattr(self, mname):
  382. self.send_error(
  383. HTTPStatus.NOT_IMPLEMENTED,
  384. "Unsupported method (%r)" % self.command)
  385. return
  386. method = getattr(self, mname)
  387. method()
  388. self.wfile.flush() #actually send the response if not already done.
  389. except socket.timeout as e:
  390. #a read or a write timed out. Discard this connection
  391. self.log_error("Request timed out: %r", e)
  392. self.close_connection = True
  393. return
  394. def handle(self):
  395. """Handle multiple requests if necessary."""
  396. self.close_connection = True
  397. self.handle_one_request()
  398. while not self.close_connection:
  399. self.handle_one_request()
  400. def send_error(self, code, message=None, explain=None):
  401. """Send and log an error reply.
  402. Arguments are
  403. * code: an HTTP error code
  404. 3 digits
  405. * message: a simple optional 1 line reason phrase.
  406. *( HTAB / SP / VCHAR / %x80-FF )
  407. defaults to short entry matching the response code
  408. * explain: a detailed message defaults to the long entry
  409. matching the response code.
  410. This sends an error response (so it must be called before any
  411. output has been generated), logs the error, and finally sends
  412. a piece of HTML explaining the error to the user.
  413. """
  414. try:
  415. shortmsg, longmsg = self.responses[code]
  416. except KeyError:
  417. shortmsg, longmsg = '???', '???'
  418. if message is None:
  419. message = shortmsg
  420. if explain is None:
  421. explain = longmsg
  422. self.log_error("code %d, message %s", code, message)
  423. self.send_response(code, message)
  424. self.send_header('Connection', 'close')
  425. # Message body is omitted for cases described in:
  426. # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
  427. # - RFC7231: 6.3.6. 205(Reset Content)
  428. body = None
  429. if (code >= 200 and
  430. code not in (HTTPStatus.NO_CONTENT,
  431. HTTPStatus.RESET_CONTENT,
  432. HTTPStatus.NOT_MODIFIED)):
  433. # HTML encode to prevent Cross Site Scripting attacks
  434. # (see bug #1100201)
  435. content = (self.error_message_format % {
  436. 'code': code,
  437. 'message': html.escape(message, quote=False),
  438. 'explain': html.escape(explain, quote=False)
  439. })
  440. body = content.encode('UTF-8', 'replace')
  441. self.send_header("Content-Type", self.error_content_type)
  442. self.send_header('Content-Length', int(len(body)))
  443. self.end_headers()
  444. if self.command != 'HEAD' and body:
  445. self.wfile.write(body)
  446. def send_response(self, code, message=None):
  447. """Add the response header to the headers buffer and log the
  448. response code.
  449. Also send two standard headers with the server software
  450. version and the current date.
  451. """
  452. self.log_request(code)
  453. self.send_response_only(code, message)
  454. self.send_header('Server', self.version_string())
  455. self.send_header('Date', self.date_time_string())
  456. def send_response_only(self, code, message=None):
  457. """Send the response header only."""
  458. if self.request_version != 'HTTP/0.9':
  459. if message is None:
  460. if code in self.responses:
  461. message = self.responses[code][0]
  462. else:
  463. message = ''
  464. if not hasattr(self, '_headers_buffer'):
  465. self._headers_buffer = []
  466. self._headers_buffer.append(("%s %d %s\r\n" %
  467. (self.protocol_version, code, message)).encode(
  468. 'latin-1', 'strict'))
  469. def send_header(self, keyword, value):
  470. """Send a MIME header to the headers buffer."""
  471. if self.request_version != 'HTTP/0.9':
  472. if not hasattr(self, '_headers_buffer'):
  473. self._headers_buffer = []
  474. self._headers_buffer.append(
  475. ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
  476. if keyword.lower() == 'connection':
  477. if value.lower() == 'close':
  478. self.close_connection = True
  479. elif value.lower() == 'keep-alive':
  480. self.close_connection = False
  481. def end_headers(self):
  482. """Send the blank line ending the MIME headers."""
  483. if self.request_version != 'HTTP/0.9':
  484. self._headers_buffer.append(b"\r\n")
  485. self.flush_headers()
  486. def flush_headers(self):
  487. if hasattr(self, '_headers_buffer'):
  488. self.wfile.write(b"".join(self._headers_buffer))
  489. self._headers_buffer = []
  490. def log_request(self, code='-', size='-'):
  491. """Log an accepted request.
  492. This is called by send_response().
  493. """
  494. if isinstance(code, HTTPStatus):
  495. code = code.value
  496. self.log_message('"%s" %s %s',
  497. self.requestline, str(code), str(size))
  498. def log_error(self, format, *args):
  499. """Log an error.
  500. This is called when a request cannot be fulfilled. By
  501. default it passes the message on to log_message().
  502. Arguments are the same as for log_message().
  503. XXX This should go to the separate error log.
  504. """
  505. self.log_message(format, *args)
  506. def log_message(self, format, *args):
  507. """Log an arbitrary message.
  508. This is used by all other logging functions. Override
  509. it if you have specific logging wishes.
  510. The first argument, FORMAT, is a format string for the
  511. message to be logged. If the format string contains
  512. any % escapes requiring parameters, they should be
  513. specified as subsequent arguments (it's just like
  514. printf!).
  515. The client ip and current date/time are prefixed to
  516. every message.
  517. """
  518. sys.stderr.write("%s - - [%s] %s\n" %
  519. (self.address_string(),
  520. self.log_date_time_string(),
  521. format%args))
  522. def version_string(self):
  523. """Return the server software version string."""
  524. return self.server_version + ' ' + self.sys_version
  525. def date_time_string(self, timestamp=None):
  526. """Return the current date and time formatted for a message header."""
  527. if timestamp is None:
  528. timestamp = time.time()
  529. return email.utils.formatdate(timestamp, usegmt=True)
  530. def log_date_time_string(self):
  531. """Return the current time formatted for logging."""
  532. now = time.time()
  533. year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
  534. s = "%02d/%3s/%04d %02d:%02d:%02d" % (
  535. day, self.monthname[month], year, hh, mm, ss)
  536. return s
  537. weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
  538. monthname = [None,
  539. 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
  540. 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
  541. def address_string(self):
  542. """Return the client address."""
  543. return self.client_address[0]
  544. # Essentially static class variables
  545. # The version of the HTTP protocol we support.
  546. # Set this to HTTP/1.1 to enable automatic keepalive
  547. protocol_version = "HTTP/1.0"
  548. # MessageClass used to parse headers
  549. MessageClass = http_client.HTTPMessage
  550. # hack to maintain backwards compatibility
  551. responses = {
  552. v: (v.phrase, v.description)
  553. for v in HTTPStatus.__members__.values()
  554. }
  555. class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
  556. """Simple HTTP request handler with GET and HEAD commands.
  557. This serves files from the current directory and any of its
  558. subdirectories. The MIME type for files is determined by
  559. calling the .guess_type() method.
  560. The GET and HEAD requests are identical except that the HEAD
  561. request omits the actual contents of the file.
  562. """
  563. server_version = "SimpleHTTP/" + __version__
  564. def do_GET(self):
  565. """Serve a GET request."""
  566. f = self.send_head()
  567. if f:
  568. try:
  569. self.copyfile(f, self.wfile)
  570. finally:
  571. f.close()
  572. def do_HEAD(self):
  573. """Serve a HEAD request."""
  574. f = self.send_head()
  575. if f:
  576. f.close()
  577. def send_head(self):
  578. """Common code for GET and HEAD commands.
  579. This sends the response code and MIME headers.
  580. Return value is either a file object (which has to be copied
  581. to the outputfile by the caller unless the command was HEAD,
  582. and must be closed by the caller under all circumstances), or
  583. None, in which case the caller has nothing further to do.
  584. """
  585. path = self.translate_path(self.path)
  586. f = None
  587. if os.path.isdir(path):
  588. parts = urllib.parse.urlsplit(self.path)
  589. if not parts.path.endswith('/'):
  590. # redirect browser - doing basically what apache does
  591. self.send_response(HTTPStatus.MOVED_PERMANENTLY)
  592. new_parts = (parts[0], parts[1], parts[2] + '/',
  593. parts[3], parts[4])
  594. new_url = urllib.parse.urlunsplit(new_parts)
  595. self.send_header("Location", new_url)
  596. self.end_headers()
  597. return None
  598. for index in "index.html", "index.htm":
  599. index = os.path.join(path, index)
  600. if os.path.exists(index):
  601. path = index
  602. break
  603. else:
  604. return self.list_directory(path)
  605. ctype = self.guess_type(path)
  606. try:
  607. f = open(path, 'rb')
  608. except OSError:
  609. self.send_error(HTTPStatus.NOT_FOUND, "File not found")
  610. return None
  611. try:
  612. self.send_response(HTTPStatus.OK)
  613. self.send_header("Content-type", ctype)
  614. fs = os.fstat(f.fileno())
  615. self.send_header("Content-Length", str(fs[6]))
  616. self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
  617. self.end_headers()
  618. return f
  619. except:
  620. f.close()
  621. raise
  622. def list_directory(self, path):
  623. """Helper to produce a directory listing (absent index.html).
  624. Return value is either a file object, or None (indicating an
  625. error). In either case, the headers are sent, making the
  626. interface the same as for send_head().
  627. """
  628. try:
  629. list = os.listdir(path)
  630. except OSError:
  631. self.send_error(
  632. HTTPStatus.NOT_FOUND,
  633. "No permission to list directory")
  634. return None
  635. list.sort(key=lambda a: a.lower())
  636. r = []
  637. try:
  638. displaypath = urllib.parse.unquote(self.path,
  639. errors='surrogatepass')
  640. except UnicodeDecodeError:
  641. displaypath = urllib.parse.unquote(path)
  642. displaypath = html.escape(displaypath, quote=False)
  643. enc = sys.getfilesystemencoding()
  644. title = 'Directory listing for %s' % displaypath
  645. r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
  646. '"http://www.w3.org/TR/html4/strict.dtd">')
  647. r.append('<html>\n<head>')
  648. r.append('<meta http-equiv="Content-Type" '
  649. 'content="text/html; charset=%s">' % enc)
  650. r.append('<title>%s</title>\n</head>' % title)
  651. r.append('<body>\n<h1>%s</h1>' % title)
  652. r.append('<hr>\n<ul>')
  653. for name in list:
  654. fullname = os.path.join(path, name)
  655. displayname = linkname = name
  656. # Append / for directories or @ for symbolic links
  657. if os.path.isdir(fullname):
  658. displayname = name + "/"
  659. linkname = name + "/"
  660. if os.path.islink(fullname):
  661. displayname = name + "@"
  662. # Note: a link to a directory displays with @ and links with /
  663. r.append('<li><a href="%s">%s</a></li>'
  664. % (urllib.parse.quote(linkname,
  665. errors='surrogatepass'),
  666. html.escape(displayname, quote=False)))
  667. r.append('</ul>\n<hr>\n</body>\n</html>\n')
  668. encoded = '\n'.join(r).encode(enc, 'surrogateescape')
  669. f = io.BytesIO()
  670. f.write(encoded)
  671. f.seek(0)
  672. self.send_response(HTTPStatus.OK)
  673. self.send_header("Content-type", "text/html; charset=%s" % enc)
  674. self.send_header("Content-Length", str(len(encoded)))
  675. self.end_headers()
  676. return f
  677. def translate_path(self, path):
  678. """Translate a /-separated PATH to the local filename syntax.
  679. Components that mean special things to the local file system
  680. (e.g. drive or directory names) are ignored. (XXX They should
  681. probably be diagnosed.)
  682. """
  683. # abandon query parameters
  684. path = path.split('?',1)[0]
  685. path = path.split('#',1)[0]
  686. # Don't forget explicit trailing slash when normalizing. Issue17324
  687. trailing_slash = path.rstrip().endswith('/')
  688. try:
  689. path = urllib.parse.unquote(path, errors='surrogatepass')
  690. except UnicodeDecodeError:
  691. path = urllib.parse.unquote(path)
  692. path = posixpath.normpath(path)
  693. words = path.split('/')
  694. words = filter(None, words)
  695. path = os.getcwd()
  696. for word in words:
  697. if os.path.dirname(word) or word in (os.curdir, os.pardir):
  698. # Ignore components that are not a simple file/directory name
  699. continue
  700. path = os.path.join(path, word)
  701. if trailing_slash:
  702. path += '/'
  703. return path
  704. def copyfile(self, source, outputfile):
  705. """Copy all data between two file objects.
  706. The SOURCE argument is a file object open for reading
  707. (or anything with a read() method) and the DESTINATION
  708. argument is a file object open for writing (or
  709. anything with a write() method).
  710. The only reason for overriding this would be to change
  711. the block size or perhaps to replace newlines by CRLF
  712. -- note however that this the default server uses this
  713. to copy binary data as well.
  714. """
  715. shutil.copyfileobj(source, outputfile)
  716. def guess_type(self, path):
  717. """Guess the type of a file.
  718. Argument is a PATH (a filename).
  719. Return value is a string of the form type/subtype,
  720. usable for a MIME Content-type header.
  721. The default implementation looks the file's extension
  722. up in the table self.extensions_map, using application/octet-stream
  723. as a default; however it would be permissible (if
  724. slow) to look inside the data to make a better guess.
  725. """
  726. base, ext = posixpath.splitext(path)
  727. if ext in self.extensions_map:
  728. return self.extensions_map[ext]
  729. ext = ext.lower()
  730. if ext in self.extensions_map:
  731. return self.extensions_map[ext]
  732. else:
  733. return self.extensions_map['']
  734. if not mimetypes.inited:
  735. mimetypes.init() # try to read system mime.types
  736. extensions_map = mimetypes.types_map.copy()
  737. extensions_map.update({
  738. '': 'application/octet-stream', # Default
  739. '.py': 'text/plain',
  740. '.c': 'text/plain',
  741. '.h': 'text/plain',
  742. })
  743. # Utilities for CGIHTTPRequestHandler
  744. def _url_collapse_path(path):
  745. """
  746. Given a URL path, remove extra '/'s and '.' path elements and collapse
  747. any '..' references and returns a collapsed path.
  748. Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
  749. The utility of this function is limited to is_cgi method and helps
  750. preventing some security attacks.
  751. Returns: The reconstituted URL, which will always start with a '/'.
  752. Raises: IndexError if too many '..' occur within the path.
  753. """
  754. # Query component should not be involved.
  755. path, _, query = path.partition('?')
  756. path = urllib.parse.unquote(path)
  757. # Similar to os.path.split(os.path.normpath(path)) but specific to URL
  758. # path semantics rather than local operating system semantics.
  759. path_parts = path.split('/')
  760. head_parts = []
  761. for part in path_parts[:-1]:
  762. if part == '..':
  763. head_parts.pop() # IndexError if more '..' than prior parts
  764. elif part and part != '.':
  765. head_parts.append( part )
  766. if path_parts:
  767. tail_part = path_parts.pop()
  768. if tail_part:
  769. if tail_part == '..':
  770. head_parts.pop()
  771. tail_part = ''
  772. elif tail_part == '.':
  773. tail_part = ''
  774. else:
  775. tail_part = ''
  776. if query:
  777. tail_part = '?'.join((tail_part, query))
  778. splitpath = ('/' + '/'.join(head_parts), tail_part)
  779. collapsed_path = "/".join(splitpath)
  780. return collapsed_path
  781. nobody = None
  782. def nobody_uid():
  783. """Internal routine to get nobody's uid"""
  784. global nobody
  785. if nobody:
  786. return nobody
  787. try:
  788. import pwd
  789. except ImportError:
  790. return -1
  791. try:
  792. nobody = pwd.getpwnam('nobody')[2]
  793. except KeyError:
  794. nobody = 1 + max(x[2] for x in pwd.getpwall())
  795. return nobody
  796. def executable(path):
  797. """Test for executable file."""
  798. return os.access(path, os.X_OK)
  799. class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
  800. """Complete HTTP server with GET, HEAD and POST commands.
  801. GET and HEAD also support running CGI scripts.
  802. The POST command is *only* implemented for CGI scripts.
  803. """
  804. # Determine platform specifics
  805. have_fork = hasattr(os, 'fork')
  806. # Make rfile unbuffered -- we need to read one line and then pass
  807. # the rest to a subprocess, so we can't use buffered input.
  808. rbufsize = 0
  809. def do_POST(self):
  810. """Serve a POST request.
  811. This is only implemented for CGI scripts.
  812. """
  813. if self.is_cgi():
  814. self.run_cgi()
  815. else:
  816. self.send_error(
  817. HTTPStatus.NOT_IMPLEMENTED,
  818. "Can only POST to CGI scripts")
  819. def send_head(self):
  820. """Version of send_head that support CGI scripts"""
  821. if self.is_cgi():
  822. return self.run_cgi()
  823. else:
  824. return SimpleHTTPRequestHandler.send_head(self)
  825. def is_cgi(self):
  826. """Test whether self.path corresponds to a CGI script.
  827. Returns True and updates the cgi_info attribute to the tuple
  828. (dir, rest) if self.path requires running a CGI script.
  829. Returns False otherwise.
  830. If any exception is raised, the caller should assume that
  831. self.path was rejected as invalid and act accordingly.
  832. The default implementation tests whether the normalized url
  833. path begins with one of the strings in self.cgi_directories
  834. (and the next character is a '/' or the end of the string).
  835. """
  836. collapsed_path = _url_collapse_path(self.path)
  837. dir_sep = collapsed_path.find('/', 1)
  838. head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
  839. if head in self.cgi_directories:
  840. self.cgi_info = head, tail
  841. return True
  842. return False
  843. cgi_directories = ['/cgi-bin', '/htbin']
  844. def is_executable(self, path):
  845. """Test whether argument path is an executable file."""
  846. return executable(path)
  847. def is_python(self, path):
  848. """Test whether argument path is a Python script."""
  849. head, tail = os.path.splitext(path)
  850. return tail.lower() in (".py", ".pyw")
  851. def run_cgi(self):
  852. """Execute a CGI script."""
  853. dir, rest = self.cgi_info
  854. path = dir + '/' + rest
  855. i = path.find('/', len(dir)+1)
  856. while i >= 0:
  857. nextdir = path[:i]
  858. nextrest = path[i+1:]
  859. scriptdir = self.translate_path(nextdir)
  860. if os.path.isdir(scriptdir):
  861. dir, rest = nextdir, nextrest
  862. i = path.find('/', len(dir)+1)
  863. else:
  864. break
  865. # find an explicit query string, if present.
  866. rest, _, query = rest.partition('?')
  867. # dissect the part after the directory name into a script name &
  868. # a possible additional path, to be stored in PATH_INFO.
  869. i = rest.find('/')
  870. if i >= 0:
  871. script, rest = rest[:i], rest[i:]
  872. else:
  873. script, rest = rest, ''
  874. scriptname = dir + '/' + script
  875. scriptfile = self.translate_path(scriptname)
  876. if not os.path.exists(scriptfile):
  877. self.send_error(
  878. HTTPStatus.NOT_FOUND,
  879. "No such CGI script (%r)" % scriptname)
  880. return
  881. if not os.path.isfile(scriptfile):
  882. self.send_error(
  883. HTTPStatus.FORBIDDEN,
  884. "CGI script is not a plain file (%r)" % scriptname)
  885. return
  886. ispy = self.is_python(scriptname)
  887. if self.have_fork or not ispy:
  888. if not self.is_executable(scriptfile):
  889. self.send_error(
  890. HTTPStatus.FORBIDDEN,
  891. "CGI script is not executable (%r)" % scriptname)
  892. return
  893. # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
  894. # XXX Much of the following could be prepared ahead of time!
  895. env = copy.deepcopy(os.environ)
  896. env['SERVER_SOFTWARE'] = self.version_string()
  897. env['SERVER_NAME'] = self.server.server_name
  898. env['GATEWAY_INTERFACE'] = 'CGI/1.1'
  899. env['SERVER_PROTOCOL'] = self.protocol_version
  900. env['SERVER_PORT'] = str(self.server.server_port)
  901. env['REQUEST_METHOD'] = self.command
  902. uqrest = urllib.parse.unquote(rest)
  903. env['PATH_INFO'] = uqrest
  904. env['PATH_TRANSLATED'] = self.translate_path(uqrest)
  905. env['SCRIPT_NAME'] = scriptname
  906. if query:
  907. env['QUERY_STRING'] = query
  908. env['REMOTE_ADDR'] = self.client_address[0]
  909. authorization = self.headers.get("authorization")
  910. if authorization:
  911. authorization = authorization.split()
  912. if len(authorization) == 2:
  913. import base64, binascii
  914. env['AUTH_TYPE'] = authorization[0]
  915. if authorization[0].lower() == "basic":
  916. try:
  917. authorization = authorization[1].encode('ascii')
  918. authorization = base64.decodebytes(authorization).\
  919. decode('ascii')
  920. except (binascii.Error, UnicodeError):
  921. pass
  922. else:
  923. authorization = authorization.split(':')
  924. if len(authorization) == 2:
  925. env['REMOTE_USER'] = authorization[0]
  926. # XXX REMOTE_IDENT
  927. if self.headers.get('content-type') is None:
  928. env['CONTENT_TYPE'] = self.headers.get_content_type()
  929. else:
  930. env['CONTENT_TYPE'] = self.headers['content-type']
  931. length = self.headers.get('content-length')
  932. if length:
  933. env['CONTENT_LENGTH'] = length
  934. referer = self.headers.get('referer')
  935. if referer:
  936. env['HTTP_REFERER'] = referer
  937. accept = []
  938. for line in self.headers.getallmatchingheaders('accept'):
  939. if line[:1] in "\t\n\r ":
  940. accept.append(line.strip())
  941. else:
  942. accept = accept + line[7:].split(',')
  943. env['HTTP_ACCEPT'] = ','.join(accept)
  944. ua = self.headers.get('user-agent')
  945. if ua:
  946. env['HTTP_USER_AGENT'] = ua
  947. co = filter(None, self.headers.get_all('cookie', []))
  948. cookie_str = ', '.join(co)
  949. if cookie_str:
  950. env['HTTP_COOKIE'] = cookie_str
  951. # XXX Other HTTP_* headers
  952. # Since we're setting the env in the parent, provide empty
  953. # values to override previously set values
  954. for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
  955. 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
  956. env.setdefault(k, "")
  957. self.send_response(HTTPStatus.OK, "Script output follows")
  958. self.flush_headers()
  959. decoded_query = query.replace('+', ' ')
  960. if self.have_fork:
  961. # Unix -- fork as we should
  962. args = [script]
  963. if '=' not in decoded_query:
  964. args.append(decoded_query)
  965. nobody = nobody_uid()
  966. self.wfile.flush() # Always flush before forking
  967. pid = os.fork()
  968. if pid != 0:
  969. # Parent
  970. pid, sts = os.waitpid(pid, 0)
  971. # throw away additional data [see bug #427345]
  972. while select.select([self.rfile], [], [], 0)[0]:
  973. if not self.rfile.read(1):
  974. break
  975. if sts:
  976. self.log_error("CGI script exit status %#x", sts)
  977. return
  978. # Child
  979. try:
  980. try:
  981. os.setuid(nobody)
  982. except OSError:
  983. pass
  984. os.dup2(self.rfile.fileno(), 0)
  985. os.dup2(self.wfile.fileno(), 1)
  986. os.execve(scriptfile, args, env)
  987. except:
  988. self.server.handle_error(self.request, self.client_address)
  989. os._exit(127)
  990. else:
  991. # Non-Unix -- use subprocess
  992. cmdline = [scriptfile]
  993. if self.is_python(scriptfile):
  994. interp = sys.executable
  995. if interp.lower().endswith("w.exe"):
  996. # On Windows, use python.exe, not pythonw.exe
  997. interp = interp[:-5] + interp[-4:]
  998. cmdline = [interp, '-u'] + cmdline
  999. if '=' not in query:
  1000. cmdline.append(query)
  1001. self.log_message("command: %s", subprocess.list2cmdline(cmdline))
  1002. try:
  1003. nbytes = int(length)
  1004. except (TypeError, ValueError):
  1005. nbytes = 0
  1006. p = subprocess.Popen(cmdline,
  1007. stdin=subprocess.PIPE,
  1008. stdout=subprocess.PIPE,
  1009. stderr=subprocess.PIPE,
  1010. env = env
  1011. )
  1012. if self.command.lower() == "post" and nbytes > 0:
  1013. data = self.rfile.read(nbytes)
  1014. else:
  1015. data = None
  1016. # throw away additional data [see bug #427345]
  1017. while select.select([self.rfile._sock], [], [], 0)[0]:
  1018. if not self.rfile._sock.recv(1):
  1019. break
  1020. stdout, stderr = p.communicate(data)
  1021. self.wfile.write(stdout)
  1022. if stderr:
  1023. self.log_error('%s', stderr)
  1024. p.stderr.close()
  1025. p.stdout.close()
  1026. status = p.returncode
  1027. if status:
  1028. self.log_error("CGI script exit status %#x", status)
  1029. else:
  1030. self.log_message("CGI script exited OK")
  1031. def test(HandlerClass=BaseHTTPRequestHandler,
  1032. ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""):
  1033. """Test the HTTP request handler class.
  1034. This runs an HTTP server on port 8000 (or the port argument).
  1035. """
  1036. server_address = (bind, port)
  1037. HandlerClass.protocol_version = protocol
  1038. with ServerClass(server_address, HandlerClass) as httpd:
  1039. sa = httpd.socket.getsockname()
  1040. serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
  1041. print(serve_message.format(host=sa[0], port=sa[1]))
  1042. try:
  1043. httpd.serve_forever()
  1044. except KeyboardInterrupt:
  1045. print("\nKeyboard interrupt received, exiting.")
  1046. sys.exit(0)
  1047. if __name__ == '__main__':
  1048. parser = argparse.ArgumentParser()
  1049. parser.add_argument('--cgi', action='store_true',
  1050. help='Run as CGI Server')
  1051. parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
  1052. help='Specify alternate bind address '
  1053. '[default: all interfaces]')
  1054. parser.add_argument('port', action='store',
  1055. default=8000, type=int,
  1056. nargs='?',
  1057. help='Specify alternate port [default: 8000]')
  1058. args = parser.parse_args()
  1059. if args.cgi:
  1060. handler_class = CGIHTTPRequestHandler
  1061. else:
  1062. handler_class = SimpleHTTPRequestHandler
  1063. test(HandlerClass=handler_class, port=args.port, bind=args.bind)