Chinaunix首页 | 论坛 | 博客
  • 博客访问: 19933424
  • 博文数量: 679
  • 博客积分: 10495
  • 博客等级: 上将
  • 技术积分: 9308
  • 用 户 组: 普通用户
  • 注册时间: 2006-07-18 10:51
文章分类

全部博文(679)

文章存档

2012年(5)

2011年(38)

2010年(86)

2009年(145)

2008年(170)

2007年(165)

2006年(89)

分类: Python/Ruby

2010-02-10 14:13:05

import re
import os.path

from robottypes import is_str, unic


_hr_re = re.compile('^-{3,} *$')
_bold_re = re.compile('''
(                         # prefix (group 1)
  (\A|\ )                 # begin of line or space
  ["'(]* _?               # optionally any char "'( and optional begin of italic
)                         #
\*                        # start of bold
([^\ ].*?)                # no space and then anything (group 3)
\*                        # end of bold
(?=                       # start of postfix (non-capturing group)
  _? ["').,!?:;]*         # optional end of italic and any char "').,!?:;
  (\Z|\ )                 # end of line or space
)
''', re.VERBOSE)
_italic_re = re.compile('''
( (\A|\ ) ["'(]* )         # begin of line or space and opt. any char "'(
_                          # start of italic
([^\ _].*?)                # no space or underline and then anything
_                          # end of italic
(?= ["').,!?:;]* (\Z|\ ) ) # opt. any char "').,!?:; and end of line or space
''', re.VERBOSE)
_url_re = re.compile('''
( (\A|\ ) ["'([]* )         # begin of line or space and opt. any char "'([
(\w{3,9}://[\S]+?)          # url (protocol is any alphanum 3-9 long string)
(?= [])"'.,!?:;]* (\Z|\ ) ) # opt. any char ])"'.,!?:; and end of line or space
''', re.VERBOSE)


def html_escape(text, formatting=False):
    if not is_str(text):
        text = unic(text)

    for name, value in [('&', '&'), ('<', '<'), ('>', '>')]:
        text = text.replace(name, value)
    
    ret = []
    table = _Table()
    hr = None

    for line in text.splitlines():
        if formatting and table.is_table_row(line):
            if hr:
                ret.append(hr)
                hr = None
            table.add_row(line)
        elif table.is_started():
            if _hr_re.match(line):
                hr = '
\n'
                line = ''
            else:
                line = _format_line(line, True)
            ret.append(table.end() + line)
        elif formatting and _hr_re.match(line):
            hr = '
\n'
        else:
            line = _format_line(line, formatting)
            if hr:
                line = hr + line
                hr = None
            ret.append(line)

    if table.is_started():
        ret.append(table.end())
    if hr:
        ret.append(hr)
       
    return '
\n'.join(ret)


def html_attr_escape(attr):
    for name, value in [('&', '&'), ('"', '"'),
                        ('<', '<'), ('>', '>')]:
        attr = attr.replace(name, value)
    for wspace in ['\n', '\r', '\t']:
        attr = attr.replace(wspace, ' ')
    return attr


class _Table:

    _is_line = re.compile('^\s*\| (.* |)\|\s*$')
    _line_splitter = re.compile(' \|(?= )')

    def __init__(self):
        self._rows = []

    def is_table_row(self, row):
        return self._is_line.match(row) is not None

    def add_row(self, text):
        text = text.strip()[1:-1]   # remove outer whitespace and pipes
        cells = [ cell.strip() for cell in self._line_splitter.split(text) ]
        self._rows.append(cells)

    def end(self):
        ret = self._format(self._rows)
        self._rows = []
        return ret

    def is_started(self):
        return len(self._rows) > 0

    def _format(self, rows):
        maxlen = max([ len(row) for row in rows ])
        table = ['']
        for row in rows:
            row += [''] * (maxlen - len(row))  # fix ragged tables
            table.append('')
            table.extend([ '' % _format_line(cell, True)
                           for cell in row ])
            table.append('')
        table.append('
%s
\n')
        return '\n'.join(table)


def _format_line(line, formatting=False):
    if formatting:
        line = _bold_re.sub('\\1\\3', line)
        line = _italic_re.sub('\\1\\3', line)
    line = _url_re.sub(lambda res: _repl_url(res, formatting), line)
    # Replace a tab with eight "hard" spaces, and two "soft" spaces with one
    # "hard" and one "soft" space (preserves spaces but allows wrapping)
    return line.replace('\t', ' '*8).replace('  ', '  ')


def _repl_url(res, formatting):
    pre = res.group(1)
    url = res.group(3).replace('"', '"')
    if formatting and os.path.splitext(url)[1].lower() \
           in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
        return '%s' % (pre, url, url)
    return '%s%s' % (pre, url, url)

文件路径:robotframework-2.1.2\src\robot\utils\htmlutils.py
功能:HTML的处理,暂不涉及
 
阅读(36295) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~