Source code for surly.re_parse
import sre_parse
import re
class ReverseParseError(Exception):
pass
[docs]class PythonReverser(object):
''' Python format string reverser. Generates a python format
string which will produce a URL.'''
def __init__(self):
self.s = ''
[docs] def add_literal(self, char):
''' Append a literal character to the format string
'''
self.s += char
[docs] def add_named_group(self, name):
''' Add a variable to the format string group to the JS expression
(e.g. {name})
:param name: the name of the capture group
:type name: string
'''
self.s += '{%s}' % name
def value(self):
return self.s
[docs]class JavascriptReverser(object):
''' Super-quick javascript reverser. Generates an anonymous function
which can take an object as a parameter and produce a URL. Since URLs
are not user generated, there is no effort to stop cross-site scripting
'''
def __init__(self):
self.s = ''
[docs] def add_literal(self, char):
''' Append a literal character to the JS expression. The function
handles single and double quotes.
'''
if char == '"':
self.s += "+'%s'" % char
else:
self.s += '+"%s"' % char
[docs] def add_named_group(self, name):
''' Add a named group to the JS expression, having it append
fields["name"]
:param name: the name of the capture group
:type name: string
'''
self.s += '+fields["%s"]' % name
[docs] def value(self):
''' Returns the anonymous JS function built up over the course of
parsing
:rtype: string
'''
return '''function(fields){return ""%s;}''' % self.s
def _recursive_parse(ast, group_index_map, *reversers):
'''
This relies heavily on the implementation of the standard
library module sre_parse, particularly the functions parse and _parse.
Regular expressions with named capture groups are supported.
Non-capturing groups are also supported; their contents are recursively
parsed.
Any non-literal expressions occurring outside of a named capture group
are not supported.
No attempt is made to make this code fast or safe.
'''
for item in ast:
item_type = item[0]
if item_type == 'literal':
# This is a matched literal; keep it
for r in reversers:
r.add_literal(unichr(item[1]))
elif item_type == 'subpattern':
subpattern = item[1]
subpattern_index = subpattern[0]
if subpattern_index is None:
# This is a non-capturing group. Parse it out recursively.
_recursive_parse(subpattern[1], group_index_map, *reversers)
else:
# Otherwise, we've found a capture group. Fill in our value.
# s += kwargs[group_index_map[subpattern_index]]
for r in reversers:
r.add_named_group(group_index_map[subpattern_index])
elif item_type == 'at' and item[1] == 'at_end':
pass
elif item_type == 'at' and item[1] == 'at_beginning':
pass
# elif item_type == 'max_repeat':
# pass
# elif item_type == 'any':
# pass
else:
raise ReverseParseError('Unsupported regex expression: %s'
% item_type)
[docs]def reverse_template(re_str):
''' Turn a regular expression into a python format string
:param re_str: the regular expression
:type re_str: string
'''
reverser = PythonReverser()
_reverse_template(re_str, reverser)
return reverser.value()
[docs]def reverse_template_js(re_str):
''' Turn a regular expression into an anonymous JS function which
can be used to generate URLs
:param re_str: the regular expression
:type re_str: string
'''
reverser = JavascriptReverser()
_reverse_template(re_str, reverser)
return reverser.value()
def _reverse_template(re_str, reverser):
'''
`reverse_template` generates a python string formatting template \
based on the capture groups in the regex ``re_str``.
:param re_str: A Python string (raw strings are recommended) \
representing the regular expression to be reversed.
'''
r = re.compile(re_str)
ast = sre_parse.parse(re_str)
group_indices = r.groupindex
group_index_map = dict((index, group)
for (group, index) in r.groupindex.items())
_recursive_parse(ast, group_index_map, reverser)
[docs]def reverse(re_str, kwargs):
'''
`reverse` interpolates `kwargs` values into their respective named \
capture groups within `re_str`.
:param re_str: A Python string (raw strings are recommended) \
representing the regular expression to be reversed.
:param kwargs: A dictionary mapping capture group names to the \
value that should be interpolated into the capture expression.
'''
r = re.compile(re_str)
ast = sre_parse.parse(re_str)
group_indices = r.groupindex
group_index_map = dict((index, group)
for (group, index) in r.groupindex.items())
python_reverser = PythonReverser()
_recursive_parse(ast, group_index_map, python_reverser)
s = python_reverser.value().format(**kwargs)
return s
def reverse_group_map(re_str):
r = re.compile(re_str)
ast = sre_parse.parse(re_str)
group_indices = r.groupindex
group_index_map = dict((index, group)
for (group, index) in r.groupindex.items())
return group_index_map