Author: tmckay Date: 2011-08-05 20:49:12 +0000 (Fri, 05 Aug 2011) New Revision: 4909
Modified: trunk/sage/python/sage/util.py Log: Change url parsing routines to use regular expressions and allow path, etc.
Modified: trunk/sage/python/sage/util.py =================================================================== --- trunk/sage/python/sage/util.py 2011-08-05 18:59:48 UTC (rev 4908) +++ trunk/sage/python/sage/util.py 2011-08-05 20:49:12 UTC (rev 4909) @@ -1,5 +1,8 @@ from time import time, sleep from threading import Thread, Lock +import re +import copy +import string
class MethodResult(object): ''' @@ -201,78 +204,81 @@ ''' raise Exception("Not implemented")
-def host_port(hostname): - ''' - Returns a tuple containing 'host' and 'port' strings from hostname. +class sage_URL(object): + def __init__(self, scheme, user, password, host, port, path): + self.scheme = scheme + self.user = user + self.password = password + self.host = host + self.port = port + self.path = path
- Strings are split at the first colon to produce host and port strings. - A string containing only digits will result in a tuple with the host - value set to None and the port value set to the entire string. A string - containing non-digits but no colon will result in a tuple with the port - value set to None and the host value set to the entire string. - ''' - assert type(hostname) in (str, unicode) - import string + def __repr__(self): + return "sage_URL(%r)" % str(self)
- host = None - port = None + def __str__(self): + s = "" + if self.scheme: + s += "%s://" % self.scheme + if self.user: + s += self.user + if self.password: + s += "/%s" % self.password + s += "@" + s += self.host + if self.port: + s += ":%s" % self.port + if self.path: + s += "/%s" % self.path + return s
- info = string.split(hostname, ":", maxsplit=1) - if len(info) == 1: - # All digits, assume it was just a port number - if info[0].isdigit(): - port = info[0] - else: - host = info[0] - else: - host = info[0] - port = info[1] - return host, port +def parse_URL(hoststring):
-def host_port_list(netlocs, default_port=None): - ''' - Parses a list of network locations and returns - a dictionary keyed by host containing sets of ports for each host. + RE = re.compile(r""" + # [ <scheme>:// ] [ <user> [ / <password> ] @] <host> [ :<port> ] [ <path> ] + ^ (?: ([^:/@]+)://)? (?: ([^:/@]+) (?: / ([^:/@]+) )? @)? ([^@:/]+) (?: :([0-9]+))? (?: / (.*))?$ + """, re.X)
- Uses sage.util.host_port() to parse each item in the list. + scheme = user = password = host = port = path = None + match = RE.match(hoststring) + if match is not None: + scheme, user, password, host, port, path = match.groups() + return sage_URL(scheme, user, password, host, port, path)
- netlocs -- comma-separated list of network locations. A network location - may have one of the following forms: 'host', 'host:port', or 'port'. - If the 'port' form is used, the 'host' value is assumed to be the last - host encountered in the list or "localhost" if no host has been - encountered. If the 'host' form is used, an entry for the host is made - in the dictionary with an port list. +def host_list(netlocs, default_scheme=None, default_port=None, default_path=None): + tokens = string.split(netlocs, ",")
- default_port -- port value for hosts in the dictionary which contain an - empty port set after 'netlocs' is fully parsed. Ignored if equal to None. - ''' - assert type(netlocs) in (str, unicode) - import string + hosts = dict() + last_url = None + last_port_set = False
- # A dictionary of sets of ports keyed - # by hostname - hosts = dict() - tokens = string.split(netlocs, ",") - lasthost = "localhost" - for name in tokens: - host, port = host_port(string.strip(name)) - if host is None: - host = lasthost + for loc in tokens: + url = None + loc = string.strip(loc) + if loc.isdigit(): + # Allow just a port number to be specified if the previous + # url explicitly set a port. Shorthand for port list. + # Copy all information from the previous token except port. + if last_url is not None and last_port_set: + url = copy.copy(last_url) + url.port = loc else: - lasthost = host - if host not in hosts: - if port is None: - hosts[host] = set() + url = parse_URL(loc) + if url.scheme is None: + url.scheme = default_scheme + if url.path is None: + url.path = default_path + if url.port is None: + url.port = str(default_port) + last_port_set = False else: - hosts[host] = set([port]) - elif port is not None: - hosts[host].add(port) + last_port_set = True + last_url = url + if url is not None and url.host is not None and url.port is not None: + if url.host not in hosts: + hosts[url.host] = list() + hosts[url.host].append(url) + + return hosts
- # Fill in default ports for hosts with no - # ports assigned - if default_port is not None: - for host, ports in hosts.iteritems(): - if len(ports) == 0: - ports.add(default_port) - return hosts
cumin-developers@lists.fedorahosted.org