2700 lines
97 KiB
Python
2700 lines
97 KiB
Python
|
"""HTML form handling for web clients.
|
||
|
|
||
|
ClientForm is a Python module for handling HTML forms on the client
|
||
|
side, useful for parsing HTML forms, filling them in and returning the
|
||
|
completed forms to the server. It has developed from a port of Gisle
|
||
|
Aas' Perl module HTML::Form, from the libwww-perl library, but the
|
||
|
interface is not the same.
|
||
|
|
||
|
The most useful docstring is the one for HTMLForm.
|
||
|
|
||
|
RFC 1866: HTML 2.0
|
||
|
RFC 1867: Form-based File Upload in HTML
|
||
|
RFC 2388: Returning Values from Forms: multipart/form-data
|
||
|
HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
|
||
|
HTML 4.01 Specification, W3C Recommendation 24 December 1999
|
||
|
|
||
|
|
||
|
Copyright 2002-2003 John J. Lee <jjl@pobox.com>
|
||
|
Copyright 1998-2000 Gisle Aas.
|
||
|
|
||
|
This code is free software; you can redistribute it and/or modify it
|
||
|
under the terms of the BSD License (see the file COPYING included with
|
||
|
the distribution).
|
||
|
|
||
|
"""
|
||
|
|
||
|
# XXX
|
||
|
# Treat unknown controls as text controls? (this was a recent LWP
|
||
|
# HTML::Form change) I guess this is INPUT with no TYPE? Check LWP
|
||
|
# source and browser behaviour.
|
||
|
# Support for list item ids. How to handle missing ids? (How do I deal
|
||
|
# with duplicate OPTION labels ATM? Can't remember...)
|
||
|
# Arrange things so can automatically PyPI-register with categories
|
||
|
# without messing up 1.5.2 compatibility.
|
||
|
# Tests need work.
|
||
|
# Test single and multiple file upload some more on the web.
|
||
|
# Does file upload work when name is missing? Sourceforge tracker form
|
||
|
# doesn't like it. Check standards, and test with Apache. Test binary
|
||
|
# upload with Apache.
|
||
|
# Add label support for CHECKBOX and RADIO.
|
||
|
# Better docs.
|
||
|
# Deal with character sets properly. Not sure what the issues are here.
|
||
|
# I don't *think* any encoding of control names, filenames or data is
|
||
|
# necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
|
||
|
# doesn't seem to do it.
|
||
|
# Add charset parameter to Content-type headers? How to find value??
|
||
|
# Get rid of MapBase, AList and MimeWriter.
|
||
|
# I'm not going to fix this unless somebody tells me what real servers
|
||
|
# that want this encoding actually expect: If enctype is
|
||
|
# application/x-www-form-urlencoded and there's a FILE control present.
|
||
|
# Strictly, it should be 'name=data' (see HTML 4.01 spec., section
|
||
|
# 17.13.2), but I send "name=" ATM. What about multiple file upload??
|
||
|
# Get rid of the two type-switches (for kind and click*).
|
||
|
# Remove single-selection code: can be special case of multi-selection,
|
||
|
# with a few variations, I think.
|
||
|
# Factor out multiple-selection list code? May not be easy. Maybe like
|
||
|
# this:
|
||
|
|
||
|
# ListControl
|
||
|
# ^
|
||
|
# | MultipleListControlMixin
|
||
|
# | ^
|
||
|
# SelectControl /
|
||
|
# ^ /
|
||
|
# \ /
|
||
|
# MultiSelectControl
|
||
|
|
||
|
|
||
|
# Plan
|
||
|
# ----
|
||
|
# Maybe a 0.2.x, cleaned up a bit and with id support for list items?
|
||
|
# Not sure it's worth it, really.
|
||
|
# Remove toggle methods.
|
||
|
# Replace by_label with choice between value / id / label /
|
||
|
# element contents (see discussion with Gisle about labels on
|
||
|
# libwww-perl list).
|
||
|
# ...what else?
|
||
|
# Work on DOMForm.
|
||
|
# XForms? Don't know if there's a need here.
|
||
|
|
||
|
|
||
|
try: True
|
||
|
except NameError:
|
||
|
True = 1
|
||
|
False = 0
|
||
|
|
||
|
try: bool
|
||
|
except NameError:
|
||
|
def bool(expr):
|
||
|
if expr: return True
|
||
|
else: return False
|
||
|
|
||
|
import sys, urllib, urllib2, types, string, mimetools, copy
|
||
|
from urlparse import urljoin
|
||
|
from cStringIO import StringIO
|
||
|
try:
|
||
|
import UnicodeType
|
||
|
except ImportError:
|
||
|
UNICODE = False
|
||
|
else:
|
||
|
UNICODE = True
|
||
|
|
||
|
VERSION = "0.1.13"
|
||
|
|
||
|
CHUNK = 1024 # size of chunks fed to parser, in bytes
|
||
|
|
||
|
# This version of urlencode is from my Python 1.5.2 back-port of the
|
||
|
# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
|
||
|
# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
|
||
|
def urlencode(query,doseq=False,):
|
||
|
"""Encode a sequence of two-element tuples or dictionary into a URL query \
|
||
|
string.
|
||
|
|
||
|
If any values in the query arg are sequences and doseq is true, each
|
||
|
sequence element is converted to a separate parameter.
|
||
|
|
||
|
If the query arg is a sequence of two-element tuples, the order of the
|
||
|
parameters in the output will match the order of parameters in the
|
||
|
input.
|
||
|
"""
|
||
|
|
||
|
if hasattr(query,"items"):
|
||
|
# mapping objects
|
||
|
query = query.items()
|
||
|
else:
|
||
|
# it's a bother at times that strings and string-like objects are
|
||
|
# sequences...
|
||
|
try:
|
||
|
# non-sequence items should not work with len()
|
||
|
x = len(query)
|
||
|
# non-empty strings will fail this
|
||
|
if len(query) and type(query[0]) != types.TupleType:
|
||
|
raise TypeError()
|
||
|
# zero-length sequences of all types will get here and succeed,
|
||
|
# but that's a minor nit - since the original implementation
|
||
|
# allowed empty dicts that type of behavior probably should be
|
||
|
# preserved for consistency
|
||
|
except TypeError:
|
||
|
ty,va,tb = sys.exc_info()
|
||
|
raise TypeError("not a valid non-string sequence or mapping "
|
||
|
"object", tb)
|
||
|
|
||
|
l = []
|
||
|
if not doseq:
|
||
|
# preserve old behavior
|
||
|
for k, v in query:
|
||
|
k = urllib.quote_plus(str(k))
|
||
|
v = urllib.quote_plus(str(v))
|
||
|
l.append(k + '=' + v)
|
||
|
else:
|
||
|
for k, v in query:
|
||
|
k = urllib.quote_plus(str(k))
|
||
|
if type(v) == types.StringType:
|
||
|
v = urllib.quote_plus(v)
|
||
|
l.append(k + '=' + v)
|
||
|
elif UNICODE and type(v) == types.UnicodeType:
|
||
|
# is there a reasonable way to convert to ASCII?
|
||
|
# encode generates a string, but "replace" or "ignore"
|
||
|
# lose information and "strict" can raise UnicodeError
|
||
|
v = urllib.quote_plus(v.encode("ASCII","replace"))
|
||
|
l.append(k + '=' + v)
|
||
|
else:
|
||
|
try:
|
||
|
# is this a sufficient test for sequence-ness?
|
||
|
x = len(v)
|
||
|
except TypeError:
|
||
|
# not a sequence
|
||
|
v = urllib.quote_plus(str(v))
|
||
|
l.append(k + '=' + v)
|
||
|
else:
|
||
|
# loop over the sequence
|
||
|
for elt in v:
|
||
|
l.append(k + '=' + urllib.quote_plus(str(elt)))
|
||
|
return string.join(l, '&')
|
||
|
|
||
|
def startswith(string, initial):
|
||
|
if len(initial) > len(string): return False
|
||
|
return string[:len(initial)] == initial
|
||
|
|
||
|
def issequence(x):
|
||
|
try:
|
||
|
x[0]
|
||
|
except (TypeError, KeyError):
|
||
|
return False
|
||
|
except IndexError:
|
||
|
pass
|
||
|
return True
|
||
|
|
||
|
def isstringlike(x):
|
||
|
try: x+""
|
||
|
except: return False
|
||
|
else: return True
|
||
|
|
||
|
|
||
|
# XXX don't really want to drag this along (MapBase, AList, MimeWriter)
|
||
|
|
||
|
class MapBase:
|
||
|
"""Mapping designed to be easily derived from.
|
||
|
|
||
|
Subclass it and override __init__, __setitem__, __getitem__, __delitem__
|
||
|
and keys. Nothing else should need to be overridden, unlike UserDict.
|
||
|
This significantly simplifies dictionary-like classes.
|
||
|
|
||
|
Also different from UserDict in that it has a redonly flag, and can be
|
||
|
updated (and initialised) with a sequence of pairs (key, value).
|
||
|
|
||
|
"""
|
||
|
def __init__(self, init=None):
|
||
|
self._data = {}
|
||
|
self.readonly = False
|
||
|
if init is not None: self.update(init)
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
return self._data[key]
|
||
|
|
||
|
def __setitem__(self, key, item):
|
||
|
if not self.readonly:
|
||
|
self._data[key] = item
|
||
|
else:
|
||
|
raise TypeError("object doesn't support item assignment")
|
||
|
|
||
|
def __delitem__(self, key):
|
||
|
if not self.readonly:
|
||
|
del self._data[key]
|
||
|
else:
|
||
|
raise TypeError("object doesn't support item deletion")
|
||
|
|
||
|
def keys(self):
|
||
|
return self._data.keys()
|
||
|
|
||
|
# now the internal workings, there should be no need to override these:
|
||
|
|
||
|
def clear(self):
|
||
|
for k in self.keys():
|
||
|
del self[k]
|
||
|
|
||
|
def __repr__(self):
|
||
|
rep = []
|
||
|
for k, v in self.items():
|
||
|
rep.append("%s: %s" % (repr(k), repr(v)))
|
||
|
return self.__class__.__name__+"{"+(string.join(rep, ", "))+"}"
|
||
|
|
||
|
def copy(self):
|
||
|
return copy.copy(self)
|
||
|
|
||
|
def __cmp__(self, dict):
|
||
|
# note: return value is *not* boolean
|
||
|
for k, v in self.items():
|
||
|
if not (dict.has_key(k) and dict[k] == v):
|
||
|
return 1 # different
|
||
|
return 0 # the same
|
||
|
|
||
|
def __len__(self):
|
||
|
return len(self.keys())
|
||
|
|
||
|
def values(self):
|
||
|
r = []
|
||
|
for k in self.keys():
|
||
|
r.append(self[k])
|
||
|
return r
|
||
|
|
||
|
def items(self):
|
||
|
keys = self.keys()
|
||
|
vals = self.values()
|
||
|
r = []
|
||
|
for i in len(self):
|
||
|
r.append((keys[i], vals[i]))
|
||
|
return r
|
||
|
|
||
|
def has_key(self, key):
|
||
|
return key in self.keys()
|
||
|
|
||
|
def update(self, map):
|
||
|
if issequence(map) and not isstringlike(map):
|
||
|
items = map
|
||
|
else:
|
||
|
items = map.items()
|
||
|
for tup in items:
|
||
|
if not isinstance(tup, TupleType):
|
||
|
raise TypeError(
|
||
|
"MapBase.update requires a map or a sequence of pairs")
|
||
|
k, v = tup
|
||
|
self[k] = v
|
||
|
|
||
|
def get(self, key, failobj=None):
|
||
|
if key in self.keys():
|
||
|
return self[key]
|
||
|
else:
|
||
|
return failobj
|
||
|
|
||
|
def setdefault(self, key, failobj=None):
|
||
|
if not self.has_key(key):
|
||
|
self[key] = failobj
|
||
|
return self[key]
|
||
|
|
||
|
|
||
|
class AList(MapBase):
|
||
|
"""Read-only ordered mapping."""
|
||
|
def __init__(self, seq=[]):
|
||
|
self.readonly = True
|
||
|
self._inverted = False
|
||
|
self._data = list(seq[:])
|
||
|
self._keys = []
|
||
|
self._values = []
|
||
|
for key, value in seq:
|
||
|
self._keys.append(key)
|
||
|
self._values.append(value)
|
||
|
|
||
|
def set_inverted(self, inverted):
|
||
|
if (inverted and not self._inverted) or (
|
||
|
not inverted and self._inverted):
|
||
|
self._keys, self._values = self._values, self._keys
|
||
|
if inverted: self._inverted = True
|
||
|
else: self._inverted = False
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
try:
|
||
|
i = self._keys.index(key)
|
||
|
except ValueError:
|
||
|
raise KeyError(key)
|
||
|
return self._values[i]
|
||
|
|
||
|
def __delitem__(self, key):
|
||
|
try:
|
||
|
i = self._keys.index[key]
|
||
|
except ValueError:
|
||
|
raise KeyError(key)
|
||
|
del self._values[i]
|
||
|
|
||
|
def keys(self): return list(self._keys[:])
|
||
|
def values(self): return list(self._values[:])
|
||
|
def items(self):
|
||
|
data = self._data[:]
|
||
|
if not self._inverted:
|
||
|
return data
|
||
|
else:
|
||
|
newdata = []
|
||
|
for k, v in data:
|
||
|
newdata.append((v, k))
|
||
|
return newdata
|
||
|
|
||
|
|
||
|
# This cut-n-pasted MimeWriter from standard library is here so can add
|
||
|
# to HTTP headers rather than message body when appropriate. It also uses
|
||
|
# \r\n in place of \n. This is nasty.
|
||
|
class MimeWriter:
|
||
|
|
||
|
"""Generic MIME writer.
|
||
|
|
||
|
Methods:
|
||
|
|
||
|
__init__()
|
||
|
addheader()
|
||
|
flushheaders()
|
||
|
startbody()
|
||
|
startmultipartbody()
|
||
|
nextpart()
|
||
|
lastpart()
|
||
|
|
||
|
A MIME writer is much more primitive than a MIME parser. It
|
||
|
doesn't seek around on the output file, and it doesn't use large
|
||
|
amounts of buffer space, so you have to write the parts in the
|
||
|
order they should occur on the output file. It does buffer the
|
||
|
headers you add, allowing you to rearrange their order.
|
||
|
|
||
|
General usage is:
|
||
|
|
||
|
f = <open the output file>
|
||
|
w = MimeWriter(f)
|
||
|
...call w.addheader(key, value) 0 or more times...
|
||
|
|
||
|
followed by either:
|
||
|
|
||
|
f = w.startbody(content_type)
|
||
|
...call f.write(data) for body data...
|
||
|
|
||
|
or:
|
||
|
|
||
|
w.startmultipartbody(subtype)
|
||
|
for each part:
|
||
|
subwriter = w.nextpart()
|
||
|
...use the subwriter's methods to create the subpart...
|
||
|
w.lastpart()
|
||
|
|
||
|
The subwriter is another MimeWriter instance, and should be
|
||
|
treated in the same way as the toplevel MimeWriter. This way,
|
||
|
writing recursive body parts is easy.
|
||
|
|
||
|
Warning: don't forget to call lastpart()!
|
||
|
|
||
|
XXX There should be more state so calls made in the wrong order
|
||
|
are detected.
|
||
|
|
||
|
Some special cases:
|
||
|
|
||
|
- startbody() just returns the file passed to the constructor;
|
||
|
but don't use this knowledge, as it may be changed.
|
||
|
|
||
|
- startmultipartbody() actually returns a file as well;
|
||
|
this can be used to write the initial 'if you can read this your
|
||
|
mailer is not MIME-aware' message.
|
||
|
|
||
|
- If you call flushheaders(), the headers accumulated so far are
|
||
|
written out (and forgotten); this is useful if you don't need a
|
||
|
body part at all, e.g. for a subpart of type message/rfc822
|
||
|
that's (mis)used to store some header-like information.
|
||
|
|
||
|
- Passing a keyword argument 'prefix=<flag>' to addheader(),
|
||
|
start*body() affects where the header is inserted; 0 means
|
||
|
append at the end, 1 means insert at the start; default is
|
||
|
append for addheader(), but insert for start*body(), which use
|
||
|
it to determine where the Content-type header goes.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, fp, http_hdrs=None):
|
||
|
self._http_hdrs = http_hdrs
|
||
|
self._fp = fp
|
||
|
self._headers = []
|
||
|
self._boundary = []
|
||
|
self._first_part = True
|
||
|
|
||
|
def addheader(self, key, value, prefix=0,
|
||
|
add_to_http_hdrs=0):
|
||
|
"""
|
||
|
prefix is ignored if add_to_http_hdrs is true.
|
||
|
"""
|
||
|
lines = string.split(value, "\r\n")
|
||
|
while lines and not lines[-1]: del lines[-1]
|
||
|
while lines and not lines[0]: del lines[0]
|
||
|
if add_to_http_hdrs:
|
||
|
value = string.join(lines, "")
|
||
|
self._http_hdrs.append((key, value))
|
||
|
else:
|
||
|
for i in range(1, len(lines)):
|
||
|
lines[i] = " " + string.strip(lines[i])
|
||
|
value = string.join(lines, "\r\n") + "\r\n"
|
||
|
line = key + ": " + value
|
||
|
if prefix:
|
||
|
self._headers.insert(0, line)
|
||
|
else:
|
||
|
self._headers.append(line)
|
||
|
|
||
|
def flushheaders(self):
|
||
|
self._fp.writelines(self._headers)
|
||
|
self._headers = []
|
||
|
|
||
|
def startbody(self, ctype=None, plist=[], prefix=1,
|
||
|
add_to_http_hdrs=0, content_type=1):
|
||
|
"""
|
||
|
prefix is ignored if add_to_http_hdrs is true.
|
||
|
"""
|
||
|
if content_type and ctype:
|
||
|
for name, value in plist:
|
||
|
ctype = ctype + ';\r\n %s=\"%s\"' % (name, value)
|
||
|
self.addheader("Content-type", ctype, prefix=prefix,
|
||
|
add_to_http_hdrs=add_to_http_hdrs)
|
||
|
self.flushheaders()
|
||
|
if not add_to_http_hdrs: self._fp.write("\r\n")
|
||
|
self._first_part = True
|
||
|
return self._fp
|
||
|
|
||
|
def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
|
||
|
add_to_http_hdrs=0, content_type=1):
|
||
|
boundary = boundary or mimetools.choose_boundary()
|
||
|
self._boundary.append(boundary)
|
||
|
return self.startbody("multipart/" + subtype,
|
||
|
[("boundary", boundary)] + plist,
|
||
|
prefix=prefix,
|
||
|
add_to_http_hdrs=add_to_http_hdrs,
|
||
|
content_type=content_type)
|
||
|
|
||
|
def nextpart(self):
|
||
|
boundary = self._boundary[-1]
|
||
|
if self._first_part:
|
||
|
self._first_part = False
|
||
|
else:
|
||
|
self._fp.write("\r\n")
|
||
|
self._fp.write("--" + boundary + "\r\n")
|
||
|
return self.__class__(self._fp)
|
||
|
|
||
|
def lastpart(self):
|
||
|
if self._first_part:
|
||
|
self.nextpart()
|
||
|
boundary = self._boundary.pop()
|
||
|
self._fp.write("\r\n--" + boundary + "--\r\n")
|
||
|
|
||
|
|
||
|
class ControlNotFoundError(ValueError): pass
|
||
|
class ItemNotFoundError(ValueError): pass
|
||
|
class ItemCountError(ValueError): pass
|
||
|
|
||
|
class ParseError(Exception): pass
|
||
|
|
||
|
|
||
|
def ParseResponse(response, select_default=False, ignore_errors=False):
|
||
|
"""Parse HTTP response and return a list of HTMLForm instances.
|
||
|
|
||
|
The return value of urllib2.urlopen can be conveniently passed to this
|
||
|
function as the response parameter.
|
||
|
|
||
|
ClientForm.ParseError is raised on parse errors.
|
||
|
|
||
|
response: file-like object (supporting read() method) with a method
|
||
|
geturl(), returning the base URI of the HTTP response
|
||
|
select_default: for multiple-selection SELECT controls and RADIO controls,
|
||
|
pick the first item as the default if none are selected in the HTML
|
||
|
ignore_errors: don't raise ParseError, and carry on regardless if the
|
||
|
parser gets confused
|
||
|
|
||
|
Pass a true value for select_default if you want the behaviour specified by
|
||
|
RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
|
||
|
RADIO or multiple-selection SELECT control if none were selected in the
|
||
|
HTML. Most browsers (including Microsoft Internet Explorer (IE) and
|
||
|
Netscape Navigator) instead leave all items unselected in these cases. The
|
||
|
W3C HTML 4.0 standard leaves this behaviour undefined in the case of
|
||
|
multiple-selection SELECT controls, but insists that at least one RADIO
|
||
|
button should be checked at all times, in contradiction to browser
|
||
|
behaviour.
|
||
|
|
||
|
Precisely what ignore_errors does isn't well-defined yet, so don't rely too
|
||
|
much on the current behaviour -- if you want robustness, you're better off
|
||
|
fixing the HTML before passing it to this function.
|
||
|
|
||
|
"""
|
||
|
return ParseFile(response, response.geturl(), select_default)
|
||
|
|
||
|
def ParseFile(file, base_uri, select_default=False, ignore_errors=False):
|
||
|
"""Parse HTML and return a list of HTMLForm instances.
|
||
|
|
||
|
ClientForm.ParseError is raised on parse errors.
|
||
|
|
||
|
file: file-like object (supporting read() method) containing HTML with zero
|
||
|
or more forms to be parsed
|
||
|
base_uri: the base URI of the document
|
||
|
|
||
|
For the other arguments and further details, see ParseResponse.__doc__.
|
||
|
|
||
|
"""
|
||
|
fp = _FORM_PARSER_CLASS(ignore_errors)
|
||
|
while 1:
|
||
|
data = file.read(CHUNK)
|
||
|
fp.feed(data)
|
||
|
if len(data) != CHUNK: break
|
||
|
forms = []
|
||
|
for (name, action, method, enctype), attrs, controls in fp.forms:
|
||
|
if action is None:
|
||
|
action = base_uri
|
||
|
else:
|
||
|
action = urljoin(base_uri, action)
|
||
|
form = HTMLForm(action, method, enctype, name, attrs)
|
||
|
for type, name, attr in controls:
|
||
|
form.new_control(type, name, attr, select_default=select_default)
|
||
|
forms.append(form)
|
||
|
for form in forms:
|
||
|
form.fixup()
|
||
|
return forms
|
||
|
|
||
|
|
||
|
class _AbstractFormParser:
|
||
|
"""forms attribute contains HTMLForm instances on completion."""
|
||
|
# pinched (and modified) from Moshe Zadka
|
||
|
def __init__(self, ignore_errors, entitydefs=None):
|
||
|
if entitydefs is not None:
|
||
|
self.entitydefs = entitydefs
|
||
|
self._ignore_errors = ignore_errors
|
||
|
self.forms = []
|
||
|
self._current_form = None
|
||
|
self._select = None
|
||
|
self._optgroup = None
|
||
|
self._option = None
|
||
|
self._textarea = None
|
||
|
|
||
|
def error(self, error):
|
||
|
if not self._ignore_errors: raise error
|
||
|
|
||
|
def start_form(self, attrs):
|
||
|
if self._current_form is not None:
|
||
|
self.error(ParseError("nested FORMs"))
|
||
|
name = None
|
||
|
action = None
|
||
|
enctype = "application/x-www-form-urlencoded"
|
||
|
method = "GET"
|
||
|
d = {}
|
||
|
for key, value in attrs:
|
||
|
if key == "name":
|
||
|
name = value
|
||
|
elif key == "action":
|
||
|
action = value
|
||
|
elif key == "method":
|
||
|
method = string.upper(value)
|
||
|
elif key == "enctype":
|
||
|
enctype = string.lower(value)
|
||
|
else:
|
||
|
d[key] = value
|
||
|
controls = []
|
||
|
self._current_form = (name, action, method, enctype), d, controls
|
||
|
|
||
|
def end_form(self):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("end of FORM before start"))
|
||
|
self.forms.append(self._current_form)
|
||
|
self._current_form = None
|
||
|
|
||
|
def start_select(self, attrs):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("start of SELECT before start of FORM"))
|
||
|
if self._select is not None:
|
||
|
self.error(ParseError("nested SELECTs"))
|
||
|
if self._textarea is not None:
|
||
|
self.error(ParseError("SELECT inside TEXTAREA"))
|
||
|
d = {}
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
|
||
|
self._select = d
|
||
|
|
||
|
self._append_select_control({"__select": d})
|
||
|
|
||
|
def end_select(self):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("end of SELECT before start of FORM"))
|
||
|
if self._select is None:
|
||
|
self.error(ParseError("end of SELECT before start"))
|
||
|
|
||
|
if self._option is not None:
|
||
|
self._end_option()
|
||
|
|
||
|
self._select = None
|
||
|
|
||
|
def start_optgroup(self, attrs):
|
||
|
if self._select is None:
|
||
|
self.error(ParseError("OPTGROUP outside of SELECT"))
|
||
|
d = {}
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
|
||
|
self._optgroup = d
|
||
|
|
||
|
def end_optgroup(self):
|
||
|
if self._optgroup is None:
|
||
|
self.error(ParseError("end of OPTGROUP before start"))
|
||
|
self._optgroup = None
|
||
|
|
||
|
def _start_option(self, attrs):
|
||
|
if self._select is None:
|
||
|
self.error(ParseError("OPTION outside of SELECT"))
|
||
|
if self._option is not None:
|
||
|
self._end_option()
|
||
|
|
||
|
d = {}
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
|
||
|
self._option = {}
|
||
|
self._option.update(d)
|
||
|
if (self._optgroup and self._optgroup.has_key("disabled") and
|
||
|
not self._option.has_key("disabled")):
|
||
|
self._option["disabled"] = None
|
||
|
|
||
|
def _end_option(self):
|
||
|
if self._option is None:
|
||
|
self.error(ParseError("end of OPTION before start"))
|
||
|
|
||
|
contents = string.strip(self._option.get("contents", ""))
|
||
|
#contents = string.strip(self._option["contents"])
|
||
|
self._option["contents"] = contents
|
||
|
if not self._option.has_key("value"):
|
||
|
self._option["value"] = contents
|
||
|
if not self._option.has_key("label"):
|
||
|
self._option["label"] = contents
|
||
|
# stuff dict of SELECT HTML attrs into a special private key
|
||
|
# (gets deleted again later)
|
||
|
self._option["__select"] = self._select
|
||
|
self._append_select_control(self._option)
|
||
|
self._option = None
|
||
|
|
||
|
def _append_select_control(self, attrs):
|
||
|
controls = self._current_form[2]
|
||
|
name = self._select.get("name")
|
||
|
controls.append(("select", name, attrs))
|
||
|
|
||
|
## def do_option(self, attrs):
|
||
|
## if self._select is None:
|
||
|
## self.error(ParseError("OPTION outside of SELECT"))
|
||
|
## d = {}
|
||
|
## for key, val in attrs:
|
||
|
## d[key] = val
|
||
|
|
||
|
## self._option = {}
|
||
|
## self._option.update(d)
|
||
|
## if (self._optgroup and self._optgroup.has_key("disabled") and
|
||
|
## not self._option.has_key("disabled")):
|
||
|
## self._option["disabled"] = None
|
||
|
|
||
|
def start_textarea(self, attrs):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("start of TEXTAREA before start of FORM"))
|
||
|
if self._textarea is not None:
|
||
|
self.error(ParseError("nested TEXTAREAs"))
|
||
|
if self._select is not None:
|
||
|
self.error(ParseError("TEXTAREA inside SELECT"))
|
||
|
d = {}
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
|
||
|
self._textarea = d
|
||
|
|
||
|
def end_textarea(self):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("end of TEXTAREA before start of FORM"))
|
||
|
if self._textarea is None:
|
||
|
self.error(ParseError("end of TEXTAREA before start"))
|
||
|
controls = self._current_form[2]
|
||
|
name = self._textarea.get("name")
|
||
|
controls.append(("textarea", name, self._textarea))
|
||
|
self._textarea = None
|
||
|
|
||
|
def handle_data(self, data):
|
||
|
if self._option is not None:
|
||
|
# self._option is a dictionary of the OPTION element's HTML
|
||
|
# attributes, but it has two special keys, one of which is the
|
||
|
# special "contents" key contains text between OPTION tags (the
|
||
|
# other is the "__select" key: see the end_option method)
|
||
|
map = self._option
|
||
|
key = "contents"
|
||
|
elif self._textarea is not None:
|
||
|
map = self._textarea
|
||
|
key = "value"
|
||
|
else:
|
||
|
return
|
||
|
|
||
|
if not map.has_key(key):
|
||
|
map[key] = data
|
||
|
else:
|
||
|
map[key] = map[key] + data
|
||
|
|
||
|
## def handle_data(self, data):
|
||
|
## if self._option is not None:
|
||
|
## contents = string.strip(data)
|
||
|
## controls = self._current_form[2]
|
||
|
## if not self._option.has_key("value"):
|
||
|
## self._option["value"] = contents
|
||
|
## if not self._option.has_key("label"):
|
||
|
## self._option["label"] = contents
|
||
|
## # self._option is a dictionary of the OPTION element's HTML
|
||
|
## # attributes, but it has two special keys:
|
||
|
## # 1. special "contents" key contains text between OPTION tags
|
||
|
## self._option["contents"] = contents
|
||
|
## # 2. stuff dict of SELECT HTML attrs into a special private key
|
||
|
## # (gets deleted again later)
|
||
|
## self._option["__select"] = self._select
|
||
|
## self._append_select_control(self._option)
|
||
|
## self._option = None
|
||
|
## elif self._textarea is not None:
|
||
|
## #self._textarea["value"] = data
|
||
|
## if self._textarea.get("value") is None:
|
||
|
## self._textarea["value"] = data
|
||
|
## else:
|
||
|
## self._textarea["value"] = self._textarea["value"] + data
|
||
|
|
||
|
def do_button(self, attrs):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("start of BUTTON before start of FORM"))
|
||
|
d = {}
|
||
|
d["type"] = "submit" # default
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
controls = self._current_form[2]
|
||
|
|
||
|
type = d["type"]
|
||
|
name = d.get("name")
|
||
|
# we don't want to lose information, so use a type string that
|
||
|
# doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
|
||
|
# eg. type for BUTTON/RESET is "resetbutton"
|
||
|
# (type for INPUT/RESET is "reset")
|
||
|
type = type+"button"
|
||
|
controls.append((type, name, d))
|
||
|
|
||
|
def do_input(self, attrs):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("start of INPUT before start of FORM"))
|
||
|
d = {}
|
||
|
d["type"] = "text" # default
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
controls = self._current_form[2]
|
||
|
|
||
|
type = d["type"]
|
||
|
name = d.get("name")
|
||
|
controls.append((type, name, d))
|
||
|
|
||
|
def do_isindex(self, attrs):
|
||
|
if self._current_form is None:
|
||
|
self.error(ParseError("start of ISINDEX before start of FORM"))
|
||
|
d = {}
|
||
|
for key, val in attrs:
|
||
|
d[key] = val
|
||
|
controls = self._current_form[2]
|
||
|
|
||
|
# isindex doesn't have type or name HTML attributes
|
||
|
controls.append(("isindex", None, d))
|
||
|
|
||
|
# use HTMLParser if we have it (it does XHTML), htmllib otherwise
|
||
|
try:
|
||
|
import HTMLParser
|
||
|
except ImportError:
|
||
|
import htmllib, formatter
|
||
|
class _FormParser(_AbstractFormParser, htmllib.HTMLParser):
|
||
|
# This is still here for compatibility with Python 1.5.2.
|
||
|
# It doesn't do the right thing with XHTML.
|
||
|
def __init__(self, ignore_errors, entitydefs=None):
|
||
|
htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
|
||
|
_AbstractFormParser.__init__(self, ignore_errors, entitydefs)
|
||
|
|
||
|
def do_option(self, attrs):
|
||
|
_AbstractFormParser._start_option(self, attrs)
|
||
|
|
||
|
_FORM_PARSER_CLASS = _FormParser
|
||
|
else:
|
||
|
class _XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
|
||
|
# thanks to Michael Howitz for this!
|
||
|
def __init__(self, ignore_errors, entitydefs=None):
|
||
|
HTMLParser.HTMLParser.__init__(self)
|
||
|
_AbstractFormParser.__init__(self, ignore_errors, entitydefs)
|
||
|
|
||
|
def start_option(self, attrs):
|
||
|
_AbstractFormParser._start_option(self, attrs)
|
||
|
|
||
|
def end_option(self):
|
||
|
_AbstractFormParser._end_option(self)
|
||
|
|
||
|
def handle_starttag(self, tag, attrs):
|
||
|
try:
|
||
|
method = getattr(self, 'start_' + tag)
|
||
|
except AttributeError:
|
||
|
try:
|
||
|
method = getattr(self, 'do_' + tag)
|
||
|
except AttributeError:
|
||
|
pass # unknown tag
|
||
|
else:
|
||
|
method(attrs)
|
||
|
else:
|
||
|
method(attrs)
|
||
|
|
||
|
def handle_endtag(self, tag):
|
||
|
try:
|
||
|
method = getattr(self, 'end_' + tag)
|
||
|
except AttributeError:
|
||
|
pass # unknown tag
|
||
|
else:
|
||
|
method()
|
||
|
|
||
|
# handle_charref, handle_entityref and default entitydefs are taken
|
||
|
# from sgmllib
|
||
|
def handle_charref(self, name):
|
||
|
try:
|
||
|
n = int(name)
|
||
|
except ValueError:
|
||
|
self.unknown_charref(name)
|
||
|
return
|
||
|
if not 0 <= n <= 255:
|
||
|
self.unknown_charref(name)
|
||
|
return
|
||
|
self.handle_data(chr(n))
|
||
|
|
||
|
# Definition of entities -- derived classes may override
|
||
|
entitydefs = \
|
||
|
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
|
||
|
|
||
|
def handle_entityref(self, name):
|
||
|
table = self.entitydefs
|
||
|
if name in table:
|
||
|
self.handle_data(table[name])
|
||
|
else:
|
||
|
self.unknown_entityref(name)
|
||
|
return
|
||
|
|
||
|
# These methods would have passed through the ref intact if I'd thought
|
||
|
# of it earlier, but since the old parser silently swallows unknown
|
||
|
# refs, so does this new parser.
|
||
|
def unknown_entityref(self, ref): pass
|
||
|
def unknown_charref(self, ref): pass
|
||
|
|
||
|
_FORM_PARSER_CLASS = _XHTMLCompatibleFormParser
|
||
|
|
||
|
|
||
|
class Control:
|
||
|
"""An HTML form control.
|
||
|
|
||
|
An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
|
||
|
things to Control objects, and most of Control's methods are, in effect,
|
||
|
documented by the HTMLForm docstrings.
|
||
|
|
||
|
The Controls in an HTMLForm can be got at via the HTMLForm.find_control
|
||
|
method or the HTMLForm.controls attribute.
|
||
|
|
||
|
Control instances are usually constructed using the ParseFile /
|
||
|
ParseResponse functions, so you can probably ignore the rest of this
|
||
|
paragraph. A Control is only properly initialised after the fixup method
|
||
|
has been called. In fact, this is only strictly necessary for ListControl
|
||
|
instances. This is necessary because ListControls are built up from
|
||
|
ListControls each containing only a single item, and their initial value(s)
|
||
|
can only be known after the sequence is complete.
|
||
|
|
||
|
The types and values that are acceptable for assignment to the value
|
||
|
attribute are defined by subclasses.
|
||
|
|
||
|
If the disabled attribute is true, this represents the state typically
|
||
|
represented by browsers by `greying out' a control. If the disabled
|
||
|
attribute is true, the Control will raise AttributeError if an attempt is
|
||
|
made to change its value. In addition, the control will not be considered
|
||
|
`successful' as defined by the W3C HTML 4 standard -- ie. it will
|
||
|
contribute no data to the return value of the HTMLForm.click* methods. To
|
||
|
enable a control, set the disabled attribute to a false value.
|
||
|
|
||
|
If the readonly attribute is true, the Control will raise AttributeError if
|
||
|
an attempt is made to change its value. To make a control writable, set
|
||
|
the readonly attribute to a false value.
|
||
|
|
||
|
All controls have the disabled and readonly attributes, not only those that
|
||
|
may have the HTML attributes of the same names.
|
||
|
|
||
|
On assignment to the value attribute, the following exceptions are raised:
|
||
|
TypeError, AttributeError (if the value attribute should not be assigned
|
||
|
to, because the control is disabled, for example) and ValueError.
|
||
|
|
||
|
If the name or value attributes are None, or the value is an empty list, or
|
||
|
if the control is disabled, the control is not successful.
|
||
|
|
||
|
Public attributes:
|
||
|
|
||
|
type: string describing type of control (see the keys of the
|
||
|
HTMLForm.type2class dictionary for the allowable values) (readonly)
|
||
|
name: name of control (readonly)
|
||
|
value: current value of control (subclasses may allow a single value, a
|
||
|
sequence of values, or either)
|
||
|
disabled: disabled state
|
||
|
readonly: readonly state
|
||
|
id: value of id HTML attribute
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
"""
|
||
|
type: string describing type of control (see the keys of the
|
||
|
HTMLForm.type2class dictionary for the allowable values)
|
||
|
name: control name
|
||
|
attrs: HTML attributes of control's HTML element
|
||
|
|
||
|
"""
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
def add_to_form(self, form):
|
||
|
form.controls.append(self)
|
||
|
|
||
|
def fixup(self):
|
||
|
pass
|
||
|
|
||
|
def __getattr__(self, name): raise NotImplementedError()
|
||
|
def __setattr__(self, name, value): raise NotImplementedError()
|
||
|
|
||
|
def pairs(self):
|
||
|
"""Return list of (key, value) pairs suitable for passing to urlencode.
|
||
|
"""
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
def _write_mime_data(self, mw):
|
||
|
"""Write data for this control to a MimeWriter."""
|
||
|
# called by HTMLForm
|
||
|
for name, value in self.pairs():
|
||
|
mw2 = mw.nextpart()
|
||
|
mw2.addheader("Content-disposition",
|
||
|
'form-data; name="%s"' % name, 1)
|
||
|
f = mw2.startbody(prefix=0)
|
||
|
f.write(value)
|
||
|
|
||
|
def __str__(self):
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class ScalarControl(Control):
|
||
|
"""Control whose value is not restricted to one of a prescribed set.
|
||
|
|
||
|
Some ScalarControls don't accept any value attribute. Otherwise, takes a
|
||
|
single value, which must be string-like.
|
||
|
|
||
|
Additional read-only public attribute:
|
||
|
|
||
|
attrs: dictionary mapping the names of original HTML attributes of the
|
||
|
control to their values
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
self.__dict__["type"] = string.lower(type)
|
||
|
self.__dict__["name"] = name
|
||
|
self._value = attrs.get("value")
|
||
|
self.disabled = attrs.has_key("disabled")
|
||
|
self.readonly = attrs.has_key("readonly")
|
||
|
self.id = attrs.get("id")
|
||
|
|
||
|
self.attrs = attrs.copy()
|
||
|
|
||
|
self._clicked = False
|
||
|
|
||
|
def __getattr__(self, name):
|
||
|
if name == "value":
|
||
|
return self.__dict__["_value"]
|
||
|
else:
|
||
|
raise AttributeError("%s instance has no attribute '%s'" %
|
||
|
(self.__class__.__name__, name))
|
||
|
|
||
|
def __setattr__(self, name, value):
|
||
|
if name == "value":
|
||
|
if not isstringlike(value):
|
||
|
raise TypeError("must assign a string")
|
||
|
elif self.readonly:
|
||
|
raise AttributeError("control '%s' is readonly" % self.name)
|
||
|
elif self.disabled:
|
||
|
raise AttributeError("control '%s' is disabled" % self.name)
|
||
|
self.__dict__["_value"] = value
|
||
|
elif name in ("name", "type"):
|
||
|
raise AttributeError("%s attribute is readonly" % name)
|
||
|
else:
|
||
|
self.__dict__[name] = value
|
||
|
|
||
|
def pairs(self):
|
||
|
name = self.name
|
||
|
value = self.value
|
||
|
if name is None or value is None or self.disabled:
|
||
|
return []
|
||
|
return [(name, value)]
|
||
|
|
||
|
def __str__(self):
|
||
|
name = self.name
|
||
|
value = self.value
|
||
|
if name is None: name = "<None>"
|
||
|
if value is None: value = "<None>"
|
||
|
|
||
|
infos = []
|
||
|
if self.disabled: infos.append("disabled")
|
||
|
if self.readonly: infos.append("readonly")
|
||
|
info = string.join(infos, ", ")
|
||
|
if info: info = " (%s)" % info
|
||
|
|
||
|
return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class TextControl(ScalarControl):
|
||
|
"""Textual input control.
|
||
|
|
||
|
Covers:
|
||
|
|
||
|
INPUT/TEXT
|
||
|
INPUT/PASSWORD
|
||
|
INPUT/FILE
|
||
|
INPUT/HIDDEN
|
||
|
TEXTAREA
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
ScalarControl.__init__(self, type, name, attrs)
|
||
|
if self.type == "hidden": self.readonly = True
|
||
|
if self._value is None:
|
||
|
self._value = ""
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class FileControl(ScalarControl):
|
||
|
"""File upload with INPUT TYPE=FILE.
|
||
|
|
||
|
The value attribute of a FileControl is always None.
|
||
|
|
||
|
Additional public method: add_file
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
ScalarControl.__init__(self, type, name, attrs)
|
||
|
self._value = None
|
||
|
self._upload_data = []
|
||
|
|
||
|
def __setattr__(self, name, value):
|
||
|
if name in ("value", "name", "type"):
|
||
|
raise AttributeError("%s attribute is readonly" % name)
|
||
|
else:
|
||
|
self.__dict__[name] = value
|
||
|
|
||
|
def add_file(self, file_object, content_type=None, filename=None):
|
||
|
if not hasattr(file_object, "read"):
|
||
|
raise TypeError("file-like object must have read method")
|
||
|
if content_type is not None and not isstringlike(content_type):
|
||
|
raise TypeError("content type must be None or string-like")
|
||
|
if filename is not None and not isstringlike(filename):
|
||
|
raise TypeError("filename must be None or string-like")
|
||
|
if content_type is None:
|
||
|
content_type = "application/octet-stream"
|
||
|
self._upload_data.append((file_object, content_type, filename))
|
||
|
|
||
|
def pairs(self):
|
||
|
# XXX should it be successful even if unnamed?
|
||
|
if self.name is None or self.disabled:
|
||
|
return []
|
||
|
return [(self.name, "")]
|
||
|
|
||
|
def _write_mime_data(self, mw):
|
||
|
# called by HTMLForm
|
||
|
if len(self._upload_data) == 1:
|
||
|
# single file
|
||
|
file_object, content_type, filename = self._upload_data[0]
|
||
|
mw2 = mw.nextpart()
|
||
|
fn_part = filename and ('; filename="%s"' % filename) or ''
|
||
|
disp = 'form-data; name="%s"%s' % (self.name, fn_part)
|
||
|
mw2.addheader("Content-disposition", disp, prefix=1)
|
||
|
fh = mw2.startbody(content_type, prefix=0)
|
||
|
fh.write(file_object.read())
|
||
|
elif len(self._upload_data) != 0:
|
||
|
# multiple files
|
||
|
mw2 = mw.nextpart()
|
||
|
disp = 'form-data; name="%s"' % self.name
|
||
|
mw2.addheader("Content-disposition", disp, prefix=1)
|
||
|
fh = mw2.startmultipartbody("mixed", prefix=0)
|
||
|
for file_object, content_type, filename in self._upload_data:
|
||
|
mw3 = mw2.nextpart()
|
||
|
fn_part = filename and ('; filename="%s"' % filename) or ''
|
||
|
disp = 'file%s' % fn_part
|
||
|
mw3.addheader("Content-disposition", disp, prefix=1)
|
||
|
fh2 = mw3.startbody(content_type, prefix=0)
|
||
|
fh2.write(file_object.read())
|
||
|
mw2.lastpart()
|
||
|
|
||
|
def __str__(self):
|
||
|
name = self.name
|
||
|
if name is None: name = "<None>"
|
||
|
|
||
|
if not self._upload_data:
|
||
|
value = "<No files added>"
|
||
|
else:
|
||
|
value = []
|
||
|
for file, ctype, filename in self._upload_data:
|
||
|
if filename is None:
|
||
|
value.append("<Unnamed file>")
|
||
|
else:
|
||
|
value.append(filename)
|
||
|
value = string.join(value, ", ")
|
||
|
|
||
|
info = []
|
||
|
if self.disabled: info.append("disabled")
|
||
|
if self.readonly: info.append("readonly")
|
||
|
info = string.join(info, ", ")
|
||
|
if info: info = " (%s)" % info
|
||
|
|
||
|
return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class IsindexControl(ScalarControl):
|
||
|
"""ISINDEX control.
|
||
|
|
||
|
ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
|
||
|
part of regular HTML forms at all, and predates it. You're only allowed
|
||
|
one ISINDEX per HTML document. ISINDEX and regular form submission are
|
||
|
mutually exclusive -- either submit a form, or the ISINDEX.
|
||
|
|
||
|
Having said this, since ISINDEX controls may appear in forms (which is
|
||
|
probably bad HTML), ParseFile / ParseResponse will include them in the
|
||
|
HTMLForm instances it returns. You can set the ISINDEX's value, as with
|
||
|
any other control (but note that ISINDEX controls have no name, so you'll
|
||
|
need to use the type argument of set_value!). When you submit the form,
|
||
|
the ISINDEX will not be successful (ie., no data will get returned to the
|
||
|
server as a result of its presence), unless you click on the ISINDEX
|
||
|
control, in which case the ISINDEX gets submitted instead of the form:
|
||
|
|
||
|
form.set_value("my isindex value", type="isindex")
|
||
|
urllib2.urlopen(form.click(type="isindex"))
|
||
|
|
||
|
ISINDEX elements outside of FORMs are ignored. If you want to submit one
|
||
|
by hand, do it like so:
|
||
|
|
||
|
url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
|
||
|
result = urllib2.urlopen(url)
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
ScalarControl.__init__(self, type, name, attrs)
|
||
|
if self._value is None:
|
||
|
self._value = ""
|
||
|
|
||
|
def pairs(self):
|
||
|
return []
|
||
|
|
||
|
def _click(self, form, coord, return_type):
|
||
|
# Relative URL for ISINDEX submission: instead of "foo=bar+baz",
|
||
|
# want "bar+baz".
|
||
|
# This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
|
||
|
# deprecated in 4.01, but it should still say how to submit it).
|
||
|
# Submission of ISINDEX is explained in the HTML 3.2 spec, though.
|
||
|
url = urljoin(form.action, "?"+urllib.quote_plus(self.value))
|
||
|
req_data = url, None, []
|
||
|
|
||
|
if return_type == "pairs":
|
||
|
return []
|
||
|
elif return_type == "request_data":
|
||
|
return req_data
|
||
|
else:
|
||
|
return urllib2.Request(url)
|
||
|
|
||
|
def __str__(self):
|
||
|
value = self.value
|
||
|
if value is None: value = "<None>"
|
||
|
|
||
|
infos = []
|
||
|
if self.disabled: infos.append("disabled")
|
||
|
if self.readonly: infos.append("readonly")
|
||
|
info = string.join(infos, ", ")
|
||
|
if info: info = " (%s)" % info
|
||
|
|
||
|
return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class IgnoreControl(ScalarControl):
|
||
|
"""Control that we're not interested in.
|
||
|
|
||
|
Covers:
|
||
|
|
||
|
INPUT/RESET
|
||
|
BUTTON/RESET
|
||
|
INPUT/BUTTON
|
||
|
BUTTON/BUTTON
|
||
|
|
||
|
These controls are always unsuccessful, in the terminology of HTML 4 (ie.
|
||
|
they never require any information to be returned to the server).
|
||
|
|
||
|
BUTTON/BUTTON is used to generate events for script embedded in HTML.
|
||
|
|
||
|
The value attribute of IgnoreControl is always None.
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
ScalarControl.__init__(self, type, name, attrs)
|
||
|
self._value = None
|
||
|
|
||
|
def __setattr__(self, name, value):
|
||
|
if name == "value":
|
||
|
raise AttributeError(
|
||
|
"control '%s' is ignored, hence read-only" % self.name)
|
||
|
elif name in ("name", "type"):
|
||
|
raise AttributeError("%s attribute is readonly" % name)
|
||
|
else:
|
||
|
self.__dict__[name] = value
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class ListControl(Control):
|
||
|
"""Control representing a sequence of items.
|
||
|
|
||
|
The value attribute of a ListControl represents the selected list items in
|
||
|
the control.
|
||
|
|
||
|
ListControl implements both list controls that take a single value and
|
||
|
those that take multiple values.
|
||
|
|
||
|
ListControls accept sequence values only. Some controls only accept
|
||
|
sequences of length 0 or 1 (RADIO, and single-selection SELECT).
|
||
|
In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
|
||
|
and multiple-selection SELECTs (those having the "multiple" HTML attribute)
|
||
|
accept sequences of any length.
|
||
|
|
||
|
Note the following mistake:
|
||
|
|
||
|
control.value = some_value
|
||
|
assert control.value == some_value # not necessarily true
|
||
|
|
||
|
The reason for this is that the value attribute always gives the list items
|
||
|
in the order they were listed in the HTML.
|
||
|
|
||
|
ListControl items can also be referred to by their labels instead of names.
|
||
|
Use the by_label argument, and the set_value_by_label, get_value_by_label
|
||
|
methods.
|
||
|
|
||
|
XXX RadioControl and CheckboxControl don't implement by_label yet.
|
||
|
|
||
|
Note that, rather confusingly, though SELECT controls are represented in
|
||
|
HTML by SELECT elements (which contain OPTION elements, representing
|
||
|
individual list items), CHECKBOXes and RADIOs are not represented by *any*
|
||
|
element. Instead, those controls are represented by a collection of INPUT
|
||
|
elements. For example, this is a SELECT control, named "control1":
|
||
|
|
||
|
<select name="control1">
|
||
|
<option>foo</option>
|
||
|
<option value="1">bar</option>
|
||
|
</select>
|
||
|
|
||
|
and this is a CHECKBOX control, named "control2":
|
||
|
|
||
|
<input type="checkbox" name="control2" value="foo" id="cbe1">
|
||
|
<input type="checkbox" name="control2" value="bar" id="cbe2">
|
||
|
|
||
|
The id attribute of a CHECKBOX or RADIO ListControl is always that of its
|
||
|
first element (for example, "cbe1" above).
|
||
|
|
||
|
|
||
|
Additional read-only public attribute: multiple.
|
||
|
|
||
|
|
||
|
ListControls are built up by the parser from their component items by
|
||
|
creating one ListControl per item, consolidating them into a single master
|
||
|
ListControl held by the HTMLForm:
|
||
|
|
||
|
-User calls form.new_control(...)
|
||
|
-Form creates Control, and calls control.add_to_form(self).
|
||
|
-Control looks for a Control with the same name and type in the form, and
|
||
|
if it finds one, merges itself with that control by calling
|
||
|
control.merge_control(self). The first Control added to the form, of a
|
||
|
particular name and type, is the only one that survives in the form.
|
||
|
-Form calls control.fixup for all its controls. ListControls in the form
|
||
|
know they can now safely pick their default values.
|
||
|
|
||
|
To create a ListControl without an HTMLForm, use:
|
||
|
|
||
|
control.merge_control(new_control)
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs={}, select_default=False,
|
||
|
called_as_base_class=False):
|
||
|
"""
|
||
|
select_default: for RADIO and multiple-selection SELECT controls, pick
|
||
|
the first item as the default if no 'selected' HTML attribute is
|
||
|
present
|
||
|
|
||
|
"""
|
||
|
if not called_as_base_class:
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
self.__dict__["type"] = string.lower(type)
|
||
|
self.__dict__["name"] = name
|
||
|
self._value = attrs.get("value")
|
||
|
self.disabled = False
|
||
|
self.readonly = False
|
||
|
self.id = attrs.get("id")
|
||
|
|
||
|
self._attrs = attrs.copy()
|
||
|
# As Controls are merged in with .merge_control(), self._attrs will
|
||
|
# refer to each Control in turn -- always the most recently merged
|
||
|
# control. Each merged-in Control instance corresponds to a single
|
||
|
# list item: see ListControl.__doc__.
|
||
|
if attrs:
|
||
|
self._attrs_list = [self._attrs] # extended by .merge_control()
|
||
|
self._disabled_list = [self._attrs.has_key("disabled")] # ditto
|
||
|
else:
|
||
|
self._attrs_list = [] # extended by .merge_control()
|
||
|
self._disabled_list = [] # ditto
|
||
|
|
||
|
self._select_default = select_default
|
||
|
self._clicked = False
|
||
|
# Some list controls can have their default set only after all items
|
||
|
# are known. If so, self._value_is_set is false, and the self.fixup
|
||
|
# method, called after all items have been added, sets the default.
|
||
|
self._value_is_set = False
|
||
|
|
||
|
def _value_from_label(self, label):
|
||
|
raise NotImplementedError("control '%s' does not yet support "
|
||
|
"by_label" % self.name)
|
||
|
|
||
|
def toggle(self, name, by_label=False):
|
||
|
return self._set_selected_state(name, 2, by_label)
|
||
|
def set(self, selected, name, by_label=False):
|
||
|
action = int(bool(selected))
|
||
|
return self._set_selected_state(name, action, by_label)
|
||
|
|
||
|
def _set_selected_state(self, name, action, by_label):
|
||
|
"""
|
||
|
name: item name
|
||
|
action:
|
||
|
0: clear
|
||
|
1: set
|
||
|
2: toggle
|
||
|
|
||
|
"""
|
||
|
if not isstringlike(name):
|
||
|
raise TypeError("item name must be string-like")
|
||
|
if self.disabled:
|
||
|
raise AttributeError("control '%s' is disabled" % self.name)
|
||
|
if self.readonly:
|
||
|
raise AttributeError("control '%s' is readonly" % self.name)
|
||
|
if by_label:
|
||
|
name = self._value_from_label(name)
|
||
|
try:
|
||
|
i = self._menu.index(name)
|
||
|
except ValueError:
|
||
|
raise ItemNotFoundError("no item named '%s'" % name)
|
||
|
|
||
|
if self.multiple:
|
||
|
if action == 2:
|
||
|
action = not self._selected[i]
|
||
|
if action and self._disabled_list[i]:
|
||
|
raise AttributeError("item '%s' is disabled" % name)
|
||
|
self._selected[i] = bool(action)
|
||
|
else:
|
||
|
if action == 2:
|
||
|
if self._selected == name:
|
||
|
action = 0
|
||
|
else:
|
||
|
action = 1
|
||
|
if action == 0 and self._selected == name:
|
||
|
self._selected = None
|
||
|
elif action == 1:
|
||
|
if self._disabled_list[i]:
|
||
|
raise AttributeError("item '%s' is disabled" % name)
|
||
|
self._selected = name
|
||
|
|
||
|
def toggle_single(self, by_label=False):
|
||
|
self._set_single_selected_state(2, by_label)
|
||
|
def set_single(self, selected, by_label=False):
|
||
|
action = int(bool(selected))
|
||
|
self._set_single_selected_state(action, by_label)
|
||
|
|
||
|
def _set_single_selected_state(self, action, by_label):
|
||
|
if len(self._menu) != 1:
|
||
|
raise ItemCountError("'%s' is not a single-item control" %
|
||
|
self.name)
|
||
|
|
||
|
name = self._menu[0]
|
||
|
if by_label:
|
||
|
name = self._value_from_label(name)
|
||
|
self._set_selected_state(name, action, by_label)
|
||
|
|
||
|
def get_item_disabled(self, name, by_label=False):
|
||
|
"""Get disabled state of named list item in a ListControl."""
|
||
|
if by_label:
|
||
|
name = self._value_from_label(name)
|
||
|
try:
|
||
|
i = self._menu.index(name)
|
||
|
except ValueError:
|
||
|
raise ItemNotFoundError()
|
||
|
else:
|
||
|
return self._disabled_list[i]
|
||
|
|
||
|
def set_item_disabled(self, disabled, name, by_label=False):
|
||
|
"""Set disabled state of named list item in a ListControl.
|
||
|
|
||
|
disabled: boolean disabled state
|
||
|
|
||
|
"""
|
||
|
if by_label:
|
||
|
name = self._value_from_label(name)
|
||
|
try:
|
||
|
i = self._menu.index(name)
|
||
|
except ValueError:
|
||
|
raise ItemNotFoundError()
|
||
|
else:
|
||
|
self._disabled_list[i] = bool(disabled)
|
||
|
|
||
|
def set_all_items_disabled(self, disabled):
|
||
|
"""Set disabled state of all list items in a ListControl.
|
||
|
|
||
|
disabled: boolean disabled state
|
||
|
|
||
|
"""
|
||
|
for i in range(len(self._disabled_list)):
|
||
|
self._disabled_list[i] = bool(disabled)
|
||
|
|
||
|
def get_item_attrs(self, name, by_label=False):
|
||
|
"""Return dictionary of HTML attributes for a single ListControl item.
|
||
|
|
||
|
The HTML element types that describe list items are: OPTION for SELECT
|
||
|
controls, INPUT for the rest. These elements have HTML attributes that
|
||
|
you may occasionally want to know about -- for example, the "alt" HTML
|
||
|
attribute gives a text string describing the item (graphical browsers
|
||
|
usually display this as a tooltip).
|
||
|
|
||
|
The returned dictionary maps HTML attribute names to values. The names
|
||
|
and values are taken from the original HTML.
|
||
|
|
||
|
Note that for SELECT controls, the returned dictionary contains a
|
||
|
special key "contents" -- see SelectControl.__doc__.
|
||
|
|
||
|
"""
|
||
|
if by_label:
|
||
|
name = self._value_from_label(name)
|
||
|
try:
|
||
|
i = self._menu.index(name)
|
||
|
except ValueError:
|
||
|
raise ItemNotFoundError()
|
||
|
return self._attrs_list[i]
|
||
|
|
||
|
def add_to_form(self, form):
|
||
|
try:
|
||
|
control = form.find_control(self.name, self.type)
|
||
|
except ControlNotFoundError:
|
||
|
Control.add_to_form(self, form)
|
||
|
else:
|
||
|
control.merge_control(self)
|
||
|
|
||
|
def merge_control(self, control):
|
||
|
assert bool(control.multiple) == bool(self.multiple)
|
||
|
assert isinstance(control, self.__class__)
|
||
|
self._menu.extend(control._menu)
|
||
|
self._attrs_list.extend(control._attrs_list)
|
||
|
self._disabled_list.extend(control._disabled_list)
|
||
|
if control.multiple:
|
||
|
self._selected.extend(control._selected)
|
||
|
else:
|
||
|
if control._value_is_set:
|
||
|
self._selected = control._selected
|
||
|
if control._value_is_set:
|
||
|
self._value_is_set = True
|
||
|
|
||
|
def fixup(self):
|
||
|
"""
|
||
|
ListControls are built up from component list items (which are also
|
||
|
ListControls) during parsing. This method should be called after all
|
||
|
items have been added. See ListControl.__doc__ for the reason this is
|
||
|
required.
|
||
|
|
||
|
"""
|
||
|
# Need to set default selection where no item was indicated as being
|
||
|
# selected by the HTML:
|
||
|
|
||
|
# CHECKBOX:
|
||
|
# Nothing should be selected.
|
||
|
# SELECT/single, SELECT/multiple and RADIO:
|
||
|
# RFC 1866 (HTML 2.0): says first item should be selected.
|
||
|
# W3C HTML 4.01 Specification: says that client behaviour is
|
||
|
# undefined in this case. For RADIO, exactly one must be selected,
|
||
|
# though which one is undefined.
|
||
|
# Both Netscape and Microsoft Internet Explorer (IE) choose first
|
||
|
# item for SELECT/single. However, both IE5 and Mozilla (both 1.0
|
||
|
# and Firebird 0.6) leave all items unselected for RADIO and
|
||
|
# SELECT/multiple.
|
||
|
|
||
|
# Since both Netscape and IE all choose the first item for
|
||
|
# SELECT/single, we do the same. OTOH, both Netscape and IE
|
||
|
# leave SELECT/multiple with nothing selected, in violation of RFC 1866
|
||
|
# (but not in violation of the W3C HTML 4 standard); the same is true
|
||
|
# of RADIO (which *is* in violation of the HTML 4 standard). We follow
|
||
|
# RFC 1866 if the select_default attribute is set, and Netscape and IE
|
||
|
# otherwise. RFC 1866 and HTML 4 are always violated insofar as you
|
||
|
# can deselect all items in a RadioControl.
|
||
|
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
def __getattr__(self, name):
|
||
|
if name == "value":
|
||
|
menu = self._menu
|
||
|
if self.multiple:
|
||
|
values = []
|
||
|
for i in range(len(menu)):
|
||
|
if self._selected[i]: values.append(menu[i])
|
||
|
return values
|
||
|
else:
|
||
|
if self._selected is None: return []
|
||
|
else: return [self._selected]
|
||
|
else:
|
||
|
raise AttributeError("%s instance has no attribute '%s'" %
|
||
|
(self.__class__.__name__, name))
|
||
|
|
||
|
def __setattr__(self, name, value):
|
||
|
if name == "value":
|
||
|
if self.disabled:
|
||
|
raise AttributeError("control '%s' is disabled" % self.name)
|
||
|
if self.readonly:
|
||
|
raise AttributeError("control '%s' is readonly" % self.name)
|
||
|
self._set_value(value)
|
||
|
elif name in ("name", "type", "multiple"):
|
||
|
raise AttributeError("%s attribute is readonly" % name)
|
||
|
else:
|
||
|
self.__dict__[name] = value
|
||
|
|
||
|
def _set_value(self, value):
|
||
|
if self.multiple:
|
||
|
self._multiple_set_value(value)
|
||
|
else:
|
||
|
self._single_set_value(value)
|
||
|
|
||
|
def _single_set_value(self, value):
|
||
|
if value is None or isstringlike(value):
|
||
|
raise TypeError("ListControl, must set a sequence")
|
||
|
nr = len(value)
|
||
|
if not (0 <= nr <= 1):
|
||
|
raise ItemCountError("single selection list, must set sequence of "
|
||
|
"length 0 or 1")
|
||
|
|
||
|
if nr == 0:
|
||
|
self._selected = None
|
||
|
else:
|
||
|
value = value[0]
|
||
|
try:
|
||
|
i = self._menu.index(value)
|
||
|
except ValueError:
|
||
|
raise ItemNotFoundError("no item named '%s'" %
|
||
|
repr(value))
|
||
|
if self._disabled_list[i]:
|
||
|
raise AttributeError("item '%s' is disabled" % value)
|
||
|
self._selected = value
|
||
|
|
||
|
def _multiple_set_value(self, value):
|
||
|
if value is None or isstringlike(value):
|
||
|
raise TypeError("ListControl, must set a sequence")
|
||
|
|
||
|
selected = [False]*len(self._selected)
|
||
|
menu = self._menu
|
||
|
disabled_list = self._disabled_list
|
||
|
|
||
|
for v in value:
|
||
|
found = False
|
||
|
for i in range(len(menu)):
|
||
|
item_name = menu[i]
|
||
|
if v == item_name:
|
||
|
if disabled_list[i]:
|
||
|
raise AttributeError("item '%s' is disabled" % value)
|
||
|
selected[i] = True
|
||
|
found = True
|
||
|
break
|
||
|
if not found:
|
||
|
raise ItemNotFoundError("no item named '%s'" % repr(v))
|
||
|
self._selected = selected
|
||
|
|
||
|
def set_value_by_label(self, value):
|
||
|
raise NotImplementedError("control '%s' does not yet support "
|
||
|
"by_label" % self.name)
|
||
|
def get_value_by_label(self):
|
||
|
raise NotImplementedError("control '%s' does not yet support "
|
||
|
"by_label" % self.name)
|
||
|
|
||
|
def possible_items(self, by_label=False):
|
||
|
if by_label:
|
||
|
raise NotImplementedError(
|
||
|
"control '%s' does not yet support by_label" % self.name)
|
||
|
return copy.copy(self._menu)
|
||
|
|
||
|
def pairs(self):
|
||
|
if self.disabled:
|
||
|
return []
|
||
|
|
||
|
if not self.multiple:
|
||
|
name = self.name
|
||
|
value = self._selected
|
||
|
if name is None or value is None:
|
||
|
return []
|
||
|
return [(name, value)]
|
||
|
else:
|
||
|
control_name = self.name # usually the name HTML attribute
|
||
|
pairs = []
|
||
|
for i in range(len(self._menu)):
|
||
|
item_name = self._menu[i] # usually the value HTML attribute
|
||
|
if self._selected[i]:
|
||
|
pairs.append((control_name, item_name))
|
||
|
return pairs
|
||
|
|
||
|
def _item_str(self, i):
|
||
|
item_name = self._menu[i]
|
||
|
if self.multiple:
|
||
|
if self._selected[i]:
|
||
|
item_name = "*"+item_name
|
||
|
else:
|
||
|
if self._selected == item_name:
|
||
|
item_name = "*"+item_name
|
||
|
if self._disabled_list[i]:
|
||
|
item_name = "(%s)" % item_name
|
||
|
return item_name
|
||
|
|
||
|
def __str__(self):
|
||
|
name = self.name
|
||
|
if name is None: name = "<None>"
|
||
|
|
||
|
display = []
|
||
|
for i in range(len(self._menu)):
|
||
|
s = self._item_str(i)
|
||
|
display.append(s)
|
||
|
|
||
|
infos = []
|
||
|
if self.disabled: infos.append("disabled")
|
||
|
if self.readonly: infos.append("readonly")
|
||
|
info = string.join(infos, ", ")
|
||
|
if info: info = " (%s)" % info
|
||
|
|
||
|
return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
|
||
|
name, string.join(display, ", "), info)
|
||
|
|
||
|
|
||
|
class RadioControl(ListControl):
|
||
|
"""
|
||
|
Covers:
|
||
|
|
||
|
INPUT/RADIO
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs, select_default=False):
|
||
|
ListControl.__init__(self, type, name, attrs, select_default,
|
||
|
called_as_base_class=True)
|
||
|
self.__dict__["multiple"] = False
|
||
|
value = attrs.get("value", "on")
|
||
|
self._menu = [value]
|
||
|
checked = attrs.has_key("checked")
|
||
|
if checked:
|
||
|
self._value_is_set = True
|
||
|
self._selected = value
|
||
|
else:
|
||
|
self._selected = None
|
||
|
|
||
|
def fixup(self):
|
||
|
if not self._value_is_set:
|
||
|
# no item explicitly selected
|
||
|
assert self._selected is None
|
||
|
if self._select_default:
|
||
|
self._selected = self._menu[0]
|
||
|
self._value_is_set = True
|
||
|
|
||
|
|
||
|
class CheckboxControl(ListControl):
|
||
|
"""
|
||
|
Covers:
|
||
|
|
||
|
INPUT/CHECKBOX
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs, select_default=False):
|
||
|
ListControl.__init__(self, type, name, attrs, select_default,
|
||
|
called_as_base_class=True)
|
||
|
self.__dict__["multiple"] = True
|
||
|
value = attrs.get("value", "on")
|
||
|
self._menu = [value]
|
||
|
checked = attrs.has_key("checked")
|
||
|
self._selected = [checked]
|
||
|
self._value_is_set = True
|
||
|
|
||
|
def fixup(self):
|
||
|
# If no items were explicitly checked in HTML, that's how we must
|
||
|
# leave it, so we have nothing to do here.
|
||
|
assert self._value_is_set
|
||
|
|
||
|
|
||
|
class SelectControl(ListControl):
|
||
|
"""
|
||
|
Covers:
|
||
|
|
||
|
SELECT (and OPTION)
|
||
|
|
||
|
SELECT control values and labels are subject to some messy defaulting
|
||
|
rules. For example, if the HTML repreentation of the control is:
|
||
|
|
||
|
<SELECT name=year>
|
||
|
<OPTION value=0 label="2002">current year</OPTION>
|
||
|
<OPTION value=1>2001</OPTION>
|
||
|
<OPTION>2000</OPTION>
|
||
|
</SELECT>
|
||
|
|
||
|
The items, in order, have labels "2002", "2001" and "2000", whereas their
|
||
|
values are "0", "1" and "2000" respectively. Note that the value of the
|
||
|
last OPTION in this example defaults to its contents, as specified by RFC
|
||
|
1866, as do the labels of the second and third OPTIONs.
|
||
|
|
||
|
The purpose of these methods is that the OPTION labels are sometimes much
|
||
|
more meaningful, than are the OPTION values, which can make for more
|
||
|
maintainable code.
|
||
|
|
||
|
Additional read-only public attribute: attrs
|
||
|
|
||
|
The attrs attribute is a dictionary of the original HTML attributes of the
|
||
|
SELECT element. Other ListControls do not have this attribute, because in
|
||
|
other cases the control as a whole does not correspond to any single HTML
|
||
|
element. The get_item_attrs method may be used as usual to get at the
|
||
|
HTML attributes of the HTML elements corresponding to individual list items
|
||
|
(for SELECT controls, these are OPTION elements).
|
||
|
|
||
|
Another special case is that the attributes dictionaries returned by
|
||
|
get_item_attrs have a special key "contents" which does not correspond to
|
||
|
any real HTML attribute, but rather contains the contents of the OPTION
|
||
|
element:
|
||
|
|
||
|
<OPTION>this bit</OPTION>
|
||
|
|
||
|
"""
|
||
|
# HTML attributes here are treated slightly from other list controls:
|
||
|
# -The SELECT HTML attributes dictionary is stuffed into the OPTION
|
||
|
# HTML attributes dictionary under the "__select" key.
|
||
|
# -The content of each OPTION element is stored under the special
|
||
|
# "contents" key of the dictionary.
|
||
|
# After all this, the dictionary is passed to the SelectControl constructor
|
||
|
# as the attrs argument, as usual. However:
|
||
|
# -The first SelectControl constructed when building up a SELECT control
|
||
|
# has a constructor attrs argument containing only the __select key -- so
|
||
|
# this SelectControl represents an empty SELECT control.
|
||
|
# -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
|
||
|
# the __select dictionary containing the SELECT HTML-attributes.
|
||
|
def __init__(self, type, name, attrs, select_default=False):
|
||
|
# fish out the SELECT HTML attributes from the OPTION HTML attributes
|
||
|
# dictionary
|
||
|
self.attrs = attrs["__select"].copy()
|
||
|
attrs = attrs.copy()
|
||
|
del attrs["__select"]
|
||
|
|
||
|
ListControl.__init__(self, type, name, attrs, select_default,
|
||
|
called_as_base_class=True)
|
||
|
|
||
|
self._label_map = None
|
||
|
self.disabled = self.attrs.has_key("disabled")
|
||
|
self.id = self.attrs.get("id")
|
||
|
|
||
|
self._menu = []
|
||
|
self._selected = []
|
||
|
self._value_is_set = False
|
||
|
if self.attrs.has_key("multiple"):
|
||
|
self.__dict__["multiple"] = True
|
||
|
self._selected = []
|
||
|
else:
|
||
|
self.__dict__["multiple"] = False
|
||
|
self._selected = None
|
||
|
|
||
|
if attrs: # OPTION item data was provided
|
||
|
value = attrs["value"]
|
||
|
self._menu.append(value)
|
||
|
selected = attrs.has_key("selected")
|
||
|
if selected:
|
||
|
self._value_is_set = True
|
||
|
if self.attrs.has_key("multiple"):
|
||
|
self._selected.append(selected)
|
||
|
elif selected:
|
||
|
self._selected = value
|
||
|
|
||
|
def _build_select_label_map(self):
|
||
|
"""Return an ordered mapping of labels to values.
|
||
|
|
||
|
For example, if the HTML repreentation of the control is as given in
|
||
|
SelectControl.__doc__, this function will return a mapping like:
|
||
|
|
||
|
{"2002": "0", "2001": "1", "2000": "2000"}
|
||
|
|
||
|
"""
|
||
|
alist = []
|
||
|
for val in self._menu:
|
||
|
attrs = self.get_item_attrs(val)
|
||
|
alist.append((attrs["label"], val))
|
||
|
return AList(alist)
|
||
|
|
||
|
def _value_from_label(self, label):
|
||
|
try:
|
||
|
return self._label_map[label]
|
||
|
except KeyError:
|
||
|
raise ItemNotFoundError("no item has label '%s'" % label)
|
||
|
|
||
|
def fixup(self):
|
||
|
if not self._value_is_set:
|
||
|
# No item explicitly selected.
|
||
|
if len(self._menu) > 0:
|
||
|
if self.multiple:
|
||
|
if self._select_default:
|
||
|
self._selected[0] = True
|
||
|
else:
|
||
|
assert self._selected is None
|
||
|
self._selected = self._menu[0]
|
||
|
self._value_is_set = True
|
||
|
self._label_map = self._build_select_label_map()
|
||
|
|
||
|
def possible_items(self, by_label=False):
|
||
|
if not by_label:
|
||
|
return copy.copy(self._menu)
|
||
|
else:
|
||
|
self._label_map.set_inverted(True)
|
||
|
try:
|
||
|
r = map(lambda v, self=self: self._label_map[v], self._menu)
|
||
|
finally:
|
||
|
self._label_map.set_inverted(False)
|
||
|
return r
|
||
|
|
||
|
def set_value_by_label(self, value):
|
||
|
if isstringlike(value):
|
||
|
raise TypeError("ListControl, must set a sequence, not a string")
|
||
|
if self.disabled:
|
||
|
raise AttributeError("control '%s' is disabled" % self.name)
|
||
|
if self.readonly:
|
||
|
raise AttributeError("control '%s' is readonly" % self.name)
|
||
|
|
||
|
try:
|
||
|
value = map(lambda v, self=self: self._label_map[v], value)
|
||
|
except KeyError, e:
|
||
|
raise ItemNotFoundError("no item has label '%s'" % e.args[0])
|
||
|
self._set_value(value)
|
||
|
|
||
|
def get_value_by_label(self):
|
||
|
menu = self._menu
|
||
|
self._label_map.set_inverted(True)
|
||
|
try:
|
||
|
if self.multiple:
|
||
|
values = []
|
||
|
for i in range(len(menu)):
|
||
|
if self._selected[i]:
|
||
|
values.append(self._label_map[menu[i]])
|
||
|
return values
|
||
|
else:
|
||
|
return [self._label_map[self._selected]]
|
||
|
finally:
|
||
|
self._label_map.set_inverted(False)
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class SubmitControl(ScalarControl):
|
||
|
"""
|
||
|
Covers:
|
||
|
|
||
|
INPUT/SUBMIT
|
||
|
BUTTON/SUBMIT
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
ScalarControl.__init__(self, type, name, attrs)
|
||
|
# IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
|
||
|
# blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
|
||
|
# to define this.
|
||
|
if self.value is None: self.value = ""
|
||
|
self.readonly = True
|
||
|
|
||
|
def _click(self, form, coord, return_type):
|
||
|
self._clicked = coord
|
||
|
r = form._switch_click(return_type)
|
||
|
self._clicked = False
|
||
|
return r
|
||
|
|
||
|
def pairs(self):
|
||
|
if not self._clicked:
|
||
|
return []
|
||
|
return ScalarControl.pairs(self)
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
class ImageControl(SubmitControl):
|
||
|
"""
|
||
|
Covers:
|
||
|
|
||
|
INPUT/IMAGE
|
||
|
|
||
|
The value attribute of an ImageControl is always None. Coordinates are
|
||
|
specified using one of the HTMLForm.click* methods.
|
||
|
|
||
|
"""
|
||
|
def __init__(self, type, name, attrs):
|
||
|
ScalarControl.__init__(self, type, name, attrs)
|
||
|
self.__dict__["value"] = None
|
||
|
|
||
|
def __setattr__(self, name, value):
|
||
|
if name in ("value", "name", "type"):
|
||
|
raise AttributeError("%s attribute is readonly" % name)
|
||
|
else:
|
||
|
self.__dict__[name] = value
|
||
|
|
||
|
def pairs(self):
|
||
|
clicked = self._clicked
|
||
|
if self.disabled or not clicked:
|
||
|
return []
|
||
|
name = self.name
|
||
|
if name is None: return []
|
||
|
return [("%s.x" % name, str(clicked[0])),
|
||
|
("%s.y" % name, str(clicked[1]))]
|
||
|
|
||
|
|
||
|
# aliases, just to make str(control) and str(form) clearer
|
||
|
class PasswordControl(TextControl): pass
|
||
|
class HiddenControl(TextControl): pass
|
||
|
class TextareaControl(TextControl): pass
|
||
|
class SubmitButtonControl(SubmitControl): pass
|
||
|
|
||
|
|
||
|
def is_listcontrol(control): return isinstance(control, ListControl)
|
||
|
|
||
|
|
||
|
class HTMLForm:
|
||
|
"""Represents a single HTML <form> ... </form> element.
|
||
|
|
||
|
A form consists of a sequence of controls that usually have names, and
|
||
|
which can take on various values. The values of the various types of
|
||
|
controls represent variously: text, zero-, one- or many-of-many choices,
|
||
|
and files to be uploaded.
|
||
|
|
||
|
Forms can be filled in with data to be returned to the server, and then
|
||
|
submitted, using the click method to generate a request object suitable for
|
||
|
passing to urllib2.urlopen (or the click_request_data or click_pairs
|
||
|
methods if you're not using urllib2).
|
||
|
|
||
|
import ClientForm
|
||
|
forms = ClientForm.ParseFile(html, base_uri)
|
||
|
form = forms[0]
|
||
|
|
||
|
form["query"] = "Python"
|
||
|
form.set("lots", "nr_results")
|
||
|
|
||
|
response = urllib2.urlopen(form.click())
|
||
|
|
||
|
Usually, HTMLForm instances are not created directly. Instead, the
|
||
|
ParseFile or ParseResponse factory functions are used. If you do construct
|
||
|
HTMLForm objects yourself, however, note that an HTMLForm instance is only
|
||
|
properly initialised after the fixup method has been called (ParseFile and
|
||
|
ParseResponse do this for you). See ListControl.__doc__ for the reason
|
||
|
this is required.
|
||
|
|
||
|
Indexing a form (form["control_name"]) returns the named Control's value
|
||
|
attribute. Assignment to a form index (form["control_name"] = something)
|
||
|
is equivalent to assignment to the named Control's value attribute. If you
|
||
|
need to be more specific than just supplying the control's name, use the
|
||
|
set_value and get_value methods.
|
||
|
|
||
|
ListControl values are lists of item names. The list item's name is the
|
||
|
value of the corresponding HTML element's "value" attribute.
|
||
|
|
||
|
Example:
|
||
|
|
||
|
<INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
|
||
|
<INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
|
||
|
|
||
|
defines a CHECKBOX control with name "cheeses" which has two items, named
|
||
|
"leicester" and "cheddar".
|
||
|
|
||
|
Another example:
|
||
|
|
||
|
<SELECT name="more_cheeses">
|
||
|
<OPTION>1</OPTION>
|
||
|
<OPTION value="2" label="CHEDDAR">cheddar</OPTION>
|
||
|
</SELECT>
|
||
|
|
||
|
defines a SELECT control with name "more_cheeses" which has two items,
|
||
|
named "1" and "2".
|
||
|
|
||
|
To set, clear or toggle individual list items, use the set and toggle
|
||
|
methods. To set the whole value, do as for any other control:use indexing
|
||
|
or the set_/get_value methods.
|
||
|
|
||
|
Example:
|
||
|
|
||
|
# select *only* the item named "cheddar"
|
||
|
form["cheeses"] = ["cheddar"]
|
||
|
# select "cheddar", leave other items unaffected
|
||
|
form.set("cheddar", "cheeses")
|
||
|
|
||
|
Some controls (RADIO and SELECT without the multiple attribute) can only
|
||
|
have zero or one items selected at a time. Some controls (CHECKBOX and
|
||
|
SELECT with the multiple attribute) can have multiple items selected at a
|
||
|
time. To set the whole value of a multiple-selection ListControl, assign a
|
||
|
sequence to a form index:
|
||
|
|
||
|
form["cheeses"] = ["cheddar", "leicester"]
|
||
|
|
||
|
To check whether a control has an item, or whether an item is selected,
|
||
|
respectively:
|
||
|
|
||
|
"cheddar" in form.possible_items("cheeses")
|
||
|
"cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
|
||
|
|
||
|
Note that some items may be disabled (see below).
|
||
|
|
||
|
Note the following mistake:
|
||
|
|
||
|
form[control_name] = control_value
|
||
|
assert form[control_name] == control_value # not necessarily true
|
||
|
|
||
|
The reason for this is that form[control_name] always gives the list items
|
||
|
in the order they were listed in the HTML.
|
||
|
|
||
|
List items (hence list values, too) can be referred to in terms of list
|
||
|
item labels rather than list item names. Currently, this is only possible
|
||
|
for SELECT controls (this is a bug). To use this feature, use the by_label
|
||
|
arguments to the various HTMLForm methods. Note that it is *item* names
|
||
|
(hence ListControl values also), not *control* names, that can be referred
|
||
|
to by label.
|
||
|
|
||
|
The question of default values of OPTION contents, labels and values is
|
||
|
somewhat complicated: see SelectControl.__doc__ and
|
||
|
ListControl.get_item_attrs.__doc__ if you think you need to know.
|
||
|
|
||
|
Controls can be disabled or readonly. In either case, the control's value
|
||
|
cannot be changed until you clear those flags (using the methods on
|
||
|
HTMLForm). Disabled is the state typically represented by browsers by
|
||
|
`greying out' a control. Disabled controls are not `successful' -- they
|
||
|
don't cause data to get returned to the server. Readonly controls usually
|
||
|
appear in browsers as read-only text boxes. Readonly controls are
|
||
|
successful. List items can also be disabled. Attempts to select disabled
|
||
|
items (with form[name] = value, or using the ListControl.set method, for
|
||
|
example) fail. Attempts to clear disabled items are allowed.
|
||
|
|
||
|
If a lot of controls are readonly, it can be useful to do this:
|
||
|
|
||
|
form.set_all_readonly(False)
|
||
|
|
||
|
When you want to do several things with a single control, or want to do
|
||
|
less common things, like changing which controls and items are disabled,
|
||
|
you can get at a particular control:
|
||
|
|
||
|
control = form.find_control("cheeses")
|
||
|
control.set_item_disabled(False, "gruyere")
|
||
|
control.set("gruyere")
|
||
|
|
||
|
Most methods on HTMLForm just delegate to the contained controls, so see
|
||
|
the docstrings of the various Control classes for further documentation.
|
||
|
Most of these delegating methods take name, type, kind, id and nr arguments
|
||
|
to specify the control to be operated on: see
|
||
|
HTMLForm.find_control.__doc__.
|
||
|
|
||
|
ControlNotFoundError (subclass of ValueError) is raised if the specified
|
||
|
control can't be found. This includes occasions where a non-ListControl
|
||
|
is found, but the method (set, for example) requires a ListControl.
|
||
|
ItemNotFoundError (subclass of ValueError) is raised if a list item can't
|
||
|
be found. ItemCountError (subclass of ValueError) is raised if an attempt
|
||
|
is made to select more than one item and the control doesn't allow that, or
|
||
|
set/get_single are called and the control contains more than one item.
|
||
|
AttributeError is raised if a control or item is readonly or disabled and
|
||
|
an attempt is made to alter its value.
|
||
|
|
||
|
XXX CheckBoxControl and RadioControl don't yet support item access by label
|
||
|
|
||
|
Security note: Remember that any passwords you store in HTMLForm instances
|
||
|
will be saved to disk in the clear if you pickle them (directly or
|
||
|
indirectly). The simplest solution to this is to avoid pickling HTMLForm
|
||
|
objects. You could also pickle before filling in any password, or just set
|
||
|
the password to "" before pickling.
|
||
|
|
||
|
|
||
|
Public attributes:
|
||
|
|
||
|
action: full (absolute URI) form action
|
||
|
method: "GET" or "POST"
|
||
|
enctype: form transfer encoding MIME type
|
||
|
name: name of form (None if no name was specified)
|
||
|
attrs: dictionary mapping original HTML form attributes to their values
|
||
|
|
||
|
controls: list of Control instances; do not alter this list
|
||
|
(instead, call form.new_control to make a Control and add it to the
|
||
|
form, or control.add_to_form if you already have a Control instance)
|
||
|
|
||
|
|
||
|
|
||
|
Methods for form filling:
|
||
|
-------------------------
|
||
|
|
||
|
Most of the these methods have very similar arguments. See
|
||
|
HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
|
||
|
arguments. See above for a description of by_label.
|
||
|
|
||
|
def find_control(self,
|
||
|
name=None, type=None, kind=None, id=None, predicate=None,
|
||
|
nr=None)
|
||
|
|
||
|
get_value(name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
set_value(value,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
|
||
|
set_all_readonly(readonly)
|
||
|
|
||
|
|
||
|
Methods applying only to ListControls:
|
||
|
|
||
|
possible_items(name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
|
||
|
set(selected, item_name,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
toggle(item_name,
|
||
|
name=None, type=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
|
||
|
set_single(selected,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
toggle_single(name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False)
|
||
|
|
||
|
|
||
|
Method applying only to FileControls:
|
||
|
|
||
|
add_file(file_object,
|
||
|
content_type="application/octet-stream", filename=None,
|
||
|
name=None, id=None, nr=None)
|
||
|
|
||
|
|
||
|
Methods applying only to clickable controls:
|
||
|
|
||
|
click(name=None, type=None, id=None, nr=0, coord=(1,1))
|
||
|
click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
|
||
|
click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
|
||
|
|
||
|
"""
|
||
|
|
||
|
type2class = {
|
||
|
"text": TextControl,
|
||
|
"password": PasswordControl,
|
||
|
"hidden": HiddenControl,
|
||
|
"textarea": TextareaControl,
|
||
|
|
||
|
"isindex": IsindexControl,
|
||
|
|
||
|
"file": FileControl,
|
||
|
|
||
|
"button": IgnoreControl,
|
||
|
"buttonbutton": IgnoreControl,
|
||
|
"reset": IgnoreControl,
|
||
|
"resetbutton": IgnoreControl,
|
||
|
|
||
|
"submit": SubmitControl,
|
||
|
"submitbutton": SubmitButtonControl,
|
||
|
"image": ImageControl,
|
||
|
|
||
|
"radio": RadioControl,
|
||
|
"checkbox": CheckboxControl,
|
||
|
"select": SelectControl,
|
||
|
}
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
# Initialisation. Use ParseResponse / ParseFile instead.
|
||
|
|
||
|
def __init__(self, action, method="GET",
|
||
|
enctype="application/x-www-form-urlencoded",
|
||
|
name=None, attrs=None):
|
||
|
"""
|
||
|
In the usual case, use ParseResponse (or ParseFile) to create new
|
||
|
HTMLForm objects.
|
||
|
|
||
|
action: full (absolute URI) form action
|
||
|
method: "GET" or "POST"
|
||
|
enctype: form transfer encoding MIME type
|
||
|
name: name of form
|
||
|
attrs: dictionary mapping original HTML form attributes to their values
|
||
|
|
||
|
"""
|
||
|
self.action = action
|
||
|
self.method = method
|
||
|
self.enctype = enctype
|
||
|
self.name = name
|
||
|
if attrs is not None:
|
||
|
self.attrs = attrs.copy()
|
||
|
else:
|
||
|
self.attrs = {}
|
||
|
self.controls = []
|
||
|
|
||
|
def new_control(self, type, name, attrs,
|
||
|
ignore_unknown=False, select_default=False):
|
||
|
"""Adds a new control to the form.
|
||
|
|
||
|
This is usually called by ParseFile and ParseResponse. Don't call it
|
||
|
youself unless you're building your own Control instances.
|
||
|
|
||
|
Note that controls representing lists of items are built up from
|
||
|
controls holding only a single list item. See ListControl.__doc__ for
|
||
|
further information.
|
||
|
|
||
|
type: type of control (see Control.__doc__ for a list)
|
||
|
attrs: HTML attributes of control
|
||
|
ignore_unknown: if true, use a dummy Control instance for controls of
|
||
|
unknown type; otherwise, raise ValueError
|
||
|
select_default: for RADIO and multiple-selection SELECT controls, pick
|
||
|
the first item as the default if no 'selected' HTML attribute is
|
||
|
present (this defaulting happens when the HTMLForm.fixup method is
|
||
|
called)
|
||
|
|
||
|
"""
|
||
|
type = string.lower(type)
|
||
|
klass = self.type2class.get(type)
|
||
|
if klass is None:
|
||
|
if ignore_unknown:
|
||
|
klass = IgnoreControl
|
||
|
else:
|
||
|
raise ValueError("Unknown control type '%s'" % type)
|
||
|
|
||
|
a = attrs.copy()
|
||
|
if issubclass(klass, ListControl):
|
||
|
control = klass(type, name, a, select_default)
|
||
|
else:
|
||
|
control = klass(type, name, a)
|
||
|
control.add_to_form(self)
|
||
|
|
||
|
def fixup(self):
|
||
|
"""Normalise form after all controls have been added.
|
||
|
|
||
|
This is usually called by ParseFile and ParseResponse. Don't call it
|
||
|
youself unless you're building your own Control instances.
|
||
|
|
||
|
This method should only be called once, after all controls have been
|
||
|
added to the form.
|
||
|
|
||
|
"""
|
||
|
for control in self.controls:
|
||
|
control.fixup()
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
def __str__(self):
|
||
|
header = "%s %s %s" % (self.method, self.action, self.enctype)
|
||
|
rep = [header]
|
||
|
for control in self.controls:
|
||
|
rep.append(" %s" % str(control))
|
||
|
return "<%s>" % string.join(rep, "\n")
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
# Form-filling methods.
|
||
|
|
||
|
def __getitem__(self, name):
|
||
|
return self.find_control(name).value
|
||
|
def __setitem__(self, name, value):
|
||
|
control = self.find_control(name)
|
||
|
try:
|
||
|
control.value = value
|
||
|
except AttributeError, e:
|
||
|
raise ValueError(str(e))
|
||
|
|
||
|
def get_value(self,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Return value of control.
|
||
|
|
||
|
If only name and value arguments are supplied, equivalent to
|
||
|
|
||
|
form[name]
|
||
|
|
||
|
"""
|
||
|
c = self.find_control(name, type, kind, id, nr=nr)
|
||
|
if by_label:
|
||
|
try:
|
||
|
meth = c.get_value_by_label
|
||
|
except AttributeError:
|
||
|
raise NotImplementedError(
|
||
|
"control '%s' does not yet support by_label" % c.name)
|
||
|
else:
|
||
|
return meth()
|
||
|
else:
|
||
|
return c.value
|
||
|
def set_value(self, value,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Set value of control.
|
||
|
|
||
|
If only name and value arguments are supplied, equivalent to
|
||
|
|
||
|
form[name] = value
|
||
|
|
||
|
"""
|
||
|
c = self.find_control(name, type, kind, id, nr=nr)
|
||
|
if by_label:
|
||
|
try:
|
||
|
meth = c.set_value_by_label
|
||
|
except AttributeError:
|
||
|
raise NotImplementedError(
|
||
|
"control '%s' does not yet support by_label" % c.name)
|
||
|
else:
|
||
|
meth(value)
|
||
|
else:
|
||
|
c.value = value
|
||
|
|
||
|
def set_all_readonly(self, readonly):
|
||
|
for control in self.controls:
|
||
|
control.readonly = bool(readonly)
|
||
|
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
# Form-filling methods applying only to ListControls.
|
||
|
|
||
|
def possible_items(self,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Return a list of all values that the specified control can take."""
|
||
|
c = self._find_list_control(name, type, kind, id, nr)
|
||
|
return c.possible_items(by_label)
|
||
|
|
||
|
def set(self, selected, item_name,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Select / deselect named list item.
|
||
|
|
||
|
selected: boolean selected state
|
||
|
|
||
|
"""
|
||
|
self._find_list_control(name, type, kind, id, nr).set(
|
||
|
selected, item_name, by_label)
|
||
|
def toggle(self, item_name,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Toggle selected state of named list item."""
|
||
|
self._find_list_control(name, type, kind, id, nr).toggle(
|
||
|
item_name, by_label)
|
||
|
|
||
|
def set_single(self, selected,
|
||
|
name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Select / deselect list item in a control having only one item.
|
||
|
|
||
|
If the control has multiple list items, ItemCountError is raised.
|
||
|
|
||
|
This is just a convenience method, so you don't need to know the item's
|
||
|
name -- the item name in these single-item controls is usually
|
||
|
something meaningless like "1" or "on".
|
||
|
|
||
|
For example, if a checkbox has a single item named "on", the following
|
||
|
two calls are equivalent:
|
||
|
|
||
|
control.toggle("on")
|
||
|
control.toggle_single()
|
||
|
|
||
|
"""
|
||
|
self._find_list_control(name, type, kind, id, nr).set_single(
|
||
|
selected, by_label)
|
||
|
def toggle_single(self, name=None, type=None, kind=None, id=None, nr=None,
|
||
|
by_label=False):
|
||
|
"""Toggle selected state of list item in control having only one item.
|
||
|
|
||
|
The rest is as for HTMLForm.set_single.__doc__.
|
||
|
|
||
|
"""
|
||
|
self._find_list_control(name, type, kind, id, nr).toggle_single(
|
||
|
by_label)
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
# Form-filling method applying only to FileControls.
|
||
|
|
||
|
def add_file(self, file_object, content_type=None, filename=None,
|
||
|
name=None, id=None, nr=None):
|
||
|
"""Add a file to be uploaded.
|
||
|
|
||
|
file_object: file-like object (with read method) from which to read
|
||
|
data to upload
|
||
|
content_type: MIME content type of data to upload
|
||
|
filename: filename to pass to server
|
||
|
|
||
|
If filename is None, no filename is sent to the server.
|
||
|
|
||
|
If content_type is None, the content type is guessed based on the
|
||
|
filename and the data from read from the file object.
|
||
|
|
||
|
XXX
|
||
|
At the moment, guessed content type is always application/octet-stream.
|
||
|
Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
|
||
|
plain text.
|
||
|
|
||
|
"""
|
||
|
self.find_control(name, "file", id=id, nr=nr).add_file(
|
||
|
file_object, content_type, filename)
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
# Form submission methods, applying only to clickable controls.
|
||
|
|
||
|
def click(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
|
||
|
"""Return request that would result from clicking on a control.
|
||
|
|
||
|
The request object is a urllib2.Request instance, which you can pass to
|
||
|
urllib2.urlopen (or ClientCookie.urlopen).
|
||
|
|
||
|
Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
|
||
|
IMAGEs) can be clicked.
|
||
|
|
||
|
Will click on the first clickable control, subject to the name, type
|
||
|
and nr arguments (as for find_control). If no name, type, id or number
|
||
|
is specified and there are no clickable controls, a request will be
|
||
|
returned for the form in its current, un-clicked, state.
|
||
|
|
||
|
IndexError is raised if any of name, type, id or nr is specified but no
|
||
|
matching control is found. ValueError is raised if the HTMLForm has an
|
||
|
enctype attribute that is not recognised.
|
||
|
|
||
|
You can optionally specify a coordinate to click at, which only makes a
|
||
|
difference if you clicked on an image.
|
||
|
|
||
|
"""
|
||
|
return self._click(name, type, id, nr, coord, "request")
|
||
|
|
||
|
def click_request_data(self,
|
||
|
name=None, type=None, id=None, nr=0, coord=(1,1)):
|
||
|
"""As for click method, but return a tuple (url, data, headers).
|
||
|
|
||
|
You can use this data to send a request to the server. This is useful
|
||
|
if you're using httplib or urllib rather than urllib2. Otherwise, use
|
||
|
the click method.
|
||
|
|
||
|
# Untested. Have to subclass to add headers, I think -- so use urllib2
|
||
|
# instead!
|
||
|
import urllib
|
||
|
url, data, hdrs = form.click_request_data()
|
||
|
r = urllib.urlopen(url, data)
|
||
|
|
||
|
# Untested. I don't know of any reason to use httplib -- you can get
|
||
|
# just as much control with urllib2.
|
||
|
import httplib, urlparse
|
||
|
url, data, hdrs = form.click_request_data()
|
||
|
tup = urlparse(url)
|
||
|
host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
|
||
|
conn = httplib.HTTPConnection(host)
|
||
|
if data:
|
||
|
httplib.request("POST", path, data, hdrs)
|
||
|
else:
|
||
|
httplib.request("GET", path, headers=hdrs)
|
||
|
r = conn.getresponse()
|
||
|
|
||
|
"""
|
||
|
return self._click(name, type, id, nr, coord, "request_data")
|
||
|
|
||
|
def click_pairs(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
|
||
|
"""As for click_request_data, but returns a list of (key, value) pairs.
|
||
|
|
||
|
You can use this list as an argument to ClientForm.urlencode. This is
|
||
|
usually only useful if you're using httplib or urllib rather than
|
||
|
urllib2 or ClientCookie. It may also be useful if you want to manually
|
||
|
tweak the keys and/or values, but this should not be necessary.
|
||
|
Otherwise, use the click method.
|
||
|
|
||
|
Note that this method is only useful for forms of MIME type
|
||
|
x-www-form-urlencoded. In particular, it does not return the
|
||
|
information required for file upload. If you need file upload and are
|
||
|
not using urllib2, use click_request_data.
|
||
|
|
||
|
Also note that Python 2.0's urllib.urlencode is slightly broken: it
|
||
|
only accepts a mapping, not a sequence of pairs, as an argument. This
|
||
|
messes up any ordering in the argument. Use ClientForm.urlencode
|
||
|
instead.
|
||
|
|
||
|
"""
|
||
|
return self._click(name, type, id, nr, coord, "pairs")
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
|
||
|
def find_control(self,
|
||
|
name=None, type=None, kind=None, id=None, predicate=None,
|
||
|
nr=None):
|
||
|
"""Locate some specific control within the form.
|
||
|
|
||
|
At least one of the name, type, kind, predicate and nr arguments must
|
||
|
be supplied. If no matching control is found, ControlNotFoundError is
|
||
|
raised.
|
||
|
|
||
|
If name is specified, then the control must have the indicated name.
|
||
|
|
||
|
If type is specified then the control must have the specified type (in
|
||
|
addition to the types possible for <input> HTML tags: "text",
|
||
|
"password", "hidden", "submit", "image", "button", "radio", "checkbox",
|
||
|
"file" we also have "reset", "buttonbutton", "submitbutton",
|
||
|
"resetbutton", "textarea", "select" and "isindex").
|
||
|
|
||
|
If kind is specified, then the control must fall into the specified
|
||
|
group, each of which satisfies a particular interface. The types are
|
||
|
"text", "list", "multilist", "singlelist", "clickable" and "file".
|
||
|
|
||
|
If id is specified, then the control must have the indicated id.
|
||
|
|
||
|
If predicate is specified, then the control must match that function.
|
||
|
The predicate function is passed the control as its single argument,
|
||
|
and should return a boolean value indicating whether the control
|
||
|
matched.
|
||
|
|
||
|
nr, if supplied, is the sequence number of the control (where 0 is the
|
||
|
first). Note that control 0 is the first control matching all the
|
||
|
other arguments (if supplied); it is not necessarily the first control
|
||
|
in the form.
|
||
|
|
||
|
"""
|
||
|
if ((name is None) and (type is None) and (kind is None) and
|
||
|
(id is None) and (predicate is None) and (nr is None)):
|
||
|
raise ValueError(
|
||
|
"at least one argument must be supplied to specify control")
|
||
|
if nr is None: nr = 0
|
||
|
|
||
|
return self._find_control(name, type, kind, id, predicate, nr)
|
||
|
|
||
|
#---------------------------------------------------
|
||
|
# Private methods.
|
||
|
|
||
|
def _find_list_control(self,
|
||
|
name=None, type=None, kind=None, id=None, nr=None):
|
||
|
if ((name is None) and (type is None) and (kind is None) and
|
||
|
(id is None) and (nr is None)):
|
||
|
raise ValueError(
|
||
|
"at least one argument must be supplied to specify control")
|
||
|
if nr is None: nr = 0
|
||
|
|
||
|
return self._find_control(name, type, kind, id, is_listcontrol, nr)
|
||
|
|
||
|
def _find_control(self, name, type, kind, id, predicate, nr):
|
||
|
if (name is not None) and not isstringlike(name):
|
||
|
raise TypeError("control name must be string-like")
|
||
|
if (type is not None) and not isstringlike(type):
|
||
|
raise TypeError("control type must be string-like")
|
||
|
if (kind is not None) and not isstringlike(kind):
|
||
|
raise TypeError("control kind must be string-like")
|
||
|
if (id is not None) and not isstringlike(id):
|
||
|
raise TypeError("control id must be string-like")
|
||
|
if (predicate is not None) and not callable(predicate):
|
||
|
raise TypeError("control predicate must be callable")
|
||
|
if nr < 0: raise ValueError("control number must be a positive "
|
||
|
"integer")
|
||
|
|
||
|
orig_nr = nr
|
||
|
|
||
|
for control in self.controls:
|
||
|
if name is not None and name != control.name:
|
||
|
continue
|
||
|
if type is not None and type != control.type:
|
||
|
continue
|
||
|
if (kind is not None and
|
||
|
not self._is_control_in_kind(control, kind)):
|
||
|
continue
|
||
|
if id is not None and id != control.id:
|
||
|
continue
|
||
|
if predicate and not predicate(control):
|
||
|
continue
|
||
|
if nr:
|
||
|
nr = nr - 1
|
||
|
continue
|
||
|
return control
|
||
|
|
||
|
description = []
|
||
|
if name is not None: description.append("name '%s'" % name)
|
||
|
if type is not None: description.append("type '%s'" % type)
|
||
|
if kind is not None: description.append("kind '%s'" % kind)
|
||
|
if id is not None: description.append("id '%s'" % id)
|
||
|
if predicate is not None:
|
||
|
description.append("matching predicate %s" % predicate)
|
||
|
if orig_nr: description.append("nr %d" % orig_nr)
|
||
|
description = string.join(description, ", ")
|
||
|
raise ControlNotFoundError("no control with "+description)
|
||
|
|
||
|
def _is_control_in_kind(self, control, kind):
|
||
|
# XXX not OO
|
||
|
if kind == "list":
|
||
|
return isinstance(control, ListControl)
|
||
|
elif kind == "multilist":
|
||
|
return bool(isinstance(control, ListControl) and control.multiple)
|
||
|
elif kind == "singlelist":
|
||
|
return bool(isinstance(control, ListControl) and
|
||
|
not control.multiple)
|
||
|
elif kind == "file":
|
||
|
return isinstance(control, FileControl)
|
||
|
elif kind == "text":
|
||
|
return isinstance(control, TextControl)
|
||
|
elif kind == "clickable":
|
||
|
return (isinstance(control, SubmitControl) or
|
||
|
isinstance(control, IsindexControl))
|
||
|
else:
|
||
|
raise ValueError("no such control kind '%s'" % kind)
|
||
|
|
||
|
def _click(self, name, type, id, nr, coord, return_type):
|
||
|
try:
|
||
|
control = self._find_control(name, type, "clickable", id, None, nr)
|
||
|
except ControlNotFoundError:
|
||
|
if ((name is not None) or (type is not None) or (id is not None) or
|
||
|
(nr != 0)):
|
||
|
raise
|
||
|
# no clickable controls, but no control was explicitly requested,
|
||
|
# so return state without clicking any control
|
||
|
return self._switch_click(return_type)
|
||
|
else:
|
||
|
return control._click(self, coord, return_type)
|
||
|
|
||
|
def _pairs(self):
|
||
|
"""Return sequence of (key, value) pairs suitable for urlencoding."""
|
||
|
pairs = []
|
||
|
for control in self.controls:
|
||
|
pairs.extend(control.pairs())
|
||
|
return pairs
|
||
|
|
||
|
def _request_data(self):
|
||
|
"""Return a tuple (url, data, headers)."""
|
||
|
method = string.upper(self.method)
|
||
|
if method == "GET":
|
||
|
if self.enctype != "application/x-www-form-urlencoded":
|
||
|
raise ValueError(
|
||
|
"unknown GET form encoding type '%s'" % self.enctype)
|
||
|
uri = "%s?%s" % (self.action, urlencode(self._pairs()))
|
||
|
return uri, None, []
|
||
|
elif method == "POST":
|
||
|
if self.enctype == "application/x-www-form-urlencoded":
|
||
|
return (self.action, urlencode(self._pairs()),
|
||
|
[("Content-type", self.enctype)])
|
||
|
elif self.enctype == "multipart/form-data":
|
||
|
data = StringIO()
|
||
|
http_hdrs = []
|
||
|
mw = MimeWriter(data, http_hdrs)
|
||
|
f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
|
||
|
prefix=0)
|
||
|
for control in self.controls:
|
||
|
control._write_mime_data(mw)
|
||
|
mw.lastpart()
|
||
|
return self.action, data.getvalue(), http_hdrs
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
"unknown POST form encoding type '%s'" % self.enctype)
|
||
|
else:
|
||
|
raise ValueError("Unknown method '%s'" % method)
|
||
|
|
||
|
def _switch_click(self, return_type):
|
||
|
# This is called by HTMLForm and clickable Controls to hide switching
|
||
|
# on return_type.
|
||
|
# XXX
|
||
|
# not OO
|
||
|
# duplicated in IsindexControl._click
|
||
|
if return_type == "pairs":
|
||
|
return self._pairs()
|
||
|
elif return_type == "request_data":
|
||
|
return self._request_data()
|
||
|
else:
|
||
|
req_data = self._request_data()
|
||
|
req = urllib2.Request(req_data[0], req_data[1])
|
||
|
for key, val in req_data[2]:
|
||
|
req.add_header(key, val)
|
||
|
return req
|