| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | SPEED_TEST = 0 |
|---|
| 4 | |
|---|
| 5 | """ |
|---|
| 6 | ==================================================================== |
|---|
| 7 | IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION |
|---|
| 8 | ==================================================================== |
|---|
| 9 | |
|---|
| 10 | Python-Markdown |
|---|
| 11 | =============== |
|---|
| 12 | |
|---|
| 13 | Converts Markdown to HTML. Basic usage as a module: |
|---|
| 14 | |
|---|
| 15 | import markdown |
|---|
| 16 | html = markdown.markdown(your_text_string) |
|---|
| 17 | |
|---|
| 18 | Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and |
|---|
| 19 | maintained by [Yuri Takhteyev](http://www.freewisdom.org). |
|---|
| 20 | |
|---|
| 21 | Project website: http://www.freewisdom.org/projects/python-markdown |
|---|
| 22 | Contact: yuri [at] freewisdom.org |
|---|
| 23 | |
|---|
| 24 | License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD |
|---|
| 25 | |
|---|
| 26 | Version: 1.5 (May 15, 2006) |
|---|
| 27 | |
|---|
| 28 | For changelog, see end of file |
|---|
| 29 | """ |
|---|
| 30 | |
|---|
| 31 | import re, sys, os, random |
|---|
| 32 | |
|---|
| 33 | # set debug level: 3 none, 2 critical, 1 informative, 0 all |
|---|
| 34 | (VERBOSE, INFO, CRITICAL, NONE) = range(4) |
|---|
| 35 | |
|---|
| 36 | MESSAGE_THRESHOLD = CRITICAL |
|---|
| 37 | |
|---|
| 38 | def message(level, text) : |
|---|
| 39 | if level >= MESSAGE_THRESHOLD : |
|---|
| 40 | print text |
|---|
| 41 | |
|---|
| 42 | |
|---|
| 43 | # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY ----------------- |
|---|
| 44 | |
|---|
| 45 | # all tabs will be expanded to up to this many spaces |
|---|
| 46 | TAB_LENGTH = 4 |
|---|
| 47 | ENABLE_ATTRIBUTES = 1 |
|---|
| 48 | SMART_EMPHASIS = 1 |
|---|
| 49 | |
|---|
| 50 | # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ---------- |
|---|
| 51 | |
|---|
| 52 | FN_BACKLINK_TEXT = "zz1337820767766393qq" |
|---|
| 53 | # a template for html placeholders |
|---|
| 54 | HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas" |
|---|
| 55 | HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls" |
|---|
| 56 | |
|---|
| 57 | BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table', |
|---|
| 58 | 'dl', 'ol', 'ul', 'script', 'noscript', |
|---|
| 59 | 'form', 'fieldset', 'iframe', 'math', 'ins', |
|---|
| 60 | 'del', 'hr', 'hr/'] |
|---|
| 61 | |
|---|
| 62 | def is_block_level (tag) : |
|---|
| 63 | return ( (tag in BLOCK_LEVEL_ELEMENTS) or |
|---|
| 64 | (tag[0] == 'h' and tag[1] in "0123456789") ) |
|---|
| 65 | |
|---|
| 66 | """ |
|---|
| 67 | ====================================================================== |
|---|
| 68 | ========================== NANODOM =================================== |
|---|
| 69 | ====================================================================== |
|---|
| 70 | |
|---|
| 71 | The three classes below implement some of the most basic DOM |
|---|
| 72 | methods. I use this instead of minidom because I need a simpler |
|---|
| 73 | functionality and do not want to require additional libraries. |
|---|
| 74 | |
|---|
| 75 | Importantly, NanoDom does not do normalization, which is what we |
|---|
| 76 | want. It also adds extra white space when converting DOM to string |
|---|
| 77 | """ |
|---|
| 78 | |
|---|
| 79 | |
|---|
| 80 | class Document : |
|---|
| 81 | |
|---|
| 82 | def appendChild(self, child) : |
|---|
| 83 | self.documentElement = child |
|---|
| 84 | child.parent = self |
|---|
| 85 | self.entities = {} |
|---|
| 86 | |
|---|
| 87 | def createElement(self, tag, textNode=None) : |
|---|
| 88 | el = Element(tag) |
|---|
| 89 | el.doc = self |
|---|
| 90 | if textNode : |
|---|
| 91 | el.appendChild(self.createTextNode(textNode)) |
|---|
| 92 | return el |
|---|
| 93 | |
|---|
| 94 | def createTextNode(self, text) : |
|---|
| 95 | node = TextNode(text) |
|---|
| 96 | node.doc = self |
|---|
| 97 | return node |
|---|
| 98 | |
|---|
| 99 | def createEntityReference(self, entity): |
|---|
| 100 | if entity not in self.entities: |
|---|
| 101 | self.entities[entity] = EntityReference(entity) |
|---|
| 102 | return self.entities[entity] |
|---|
| 103 | |
|---|
| 104 | def toxml (self) : |
|---|
| 105 | return self.documentElement.toxml() |
|---|
| 106 | |
|---|
| 107 | def normalizeEntities(self, text) : |
|---|
| 108 | |
|---|
| 109 | pairs = [ #("&", "&"), |
|---|
| 110 | ("<", "<"), |
|---|
| 111 | (">", ">"), |
|---|
| 112 | ("\"", """)] |
|---|
| 113 | |
|---|
| 114 | for old, new in pairs : |
|---|
| 115 | text = text.replace(old, new) |
|---|
| 116 | return text |
|---|
| 117 | |
|---|
| 118 | def find(self, test) : |
|---|
| 119 | return self.documentElement.find(test) |
|---|
| 120 | |
|---|
| 121 | def unlink(self) : |
|---|
| 122 | self.documentElement.unlink() |
|---|
| 123 | self.documentElement = None |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | class Element : |
|---|
| 127 | |
|---|
| 128 | type = "element" |
|---|
| 129 | |
|---|
| 130 | def __init__ (self, tag) : |
|---|
| 131 | |
|---|
| 132 | self.nodeName = tag |
|---|
| 133 | self.attributes = [] |
|---|
| 134 | self.attribute_values = {} |
|---|
| 135 | self.childNodes = [] |
|---|
| 136 | |
|---|
| 137 | def unlink(self) : |
|---|
| 138 | for child in self.childNodes : |
|---|
| 139 | if child.type == "element" : |
|---|
| 140 | child.unlink() |
|---|
| 141 | self.childNodes = None |
|---|
| 142 | |
|---|
| 143 | def setAttribute(self, attr, value) : |
|---|
| 144 | if not attr in self.attributes : |
|---|
| 145 | self.attributes.append(attr) |
|---|
| 146 | |
|---|
| 147 | self.attribute_values[attr] = value |
|---|
| 148 | |
|---|
| 149 | def insertChild(self, position, child) : |
|---|
| 150 | self.childNodes.insert(position, child) |
|---|
| 151 | child.parent = self |
|---|
| 152 | |
|---|
| 153 | def removeChild(self, child) : |
|---|
| 154 | self.childNodes.remove(child) |
|---|
| 155 | |
|---|
| 156 | def replaceChild(self, oldChild, newChild) : |
|---|
| 157 | position = self.childNodes.index(oldChild) |
|---|
| 158 | self.removeChild(oldChild) |
|---|
| 159 | self.insertChild(position, newChild) |
|---|
| 160 | |
|---|
| 161 | def appendChild(self, child) : |
|---|
| 162 | self.childNodes.append(child) |
|---|
| 163 | child.parent = self |
|---|
| 164 | |
|---|
| 165 | def handleAttributes(self) : |
|---|
| 166 | pass |
|---|
| 167 | |
|---|
| 168 | def find(self, test, depth=0) : |
|---|
| 169 | """ Returns a list of descendants that pass the test function """ |
|---|
| 170 | matched_nodes = [] |
|---|
| 171 | for child in self.childNodes : |
|---|
| 172 | if test(child) : |
|---|
| 173 | matched_nodes.append(child) |
|---|
| 174 | if child.type == "element" : |
|---|
| 175 | matched_nodes += child.find(test, depth+1) |
|---|
| 176 | return matched_nodes |
|---|
| 177 | |
|---|
| 178 | def toxml(self): |
|---|
| 179 | if ENABLE_ATTRIBUTES : |
|---|
| 180 | for child in self.childNodes: |
|---|
| 181 | child.handleAttributes() |
|---|
| 182 | buffer = "" |
|---|
| 183 | if self.nodeName in ['h1', 'h2', 'h3', 'h4'] : |
|---|
| 184 | buffer += "\n" |
|---|
| 185 | elif self.nodeName in ['li'] : |
|---|
| 186 | buffer += "\n " |
|---|
| 187 | buffer += "<" + self.nodeName |
|---|
| 188 | for attr in self.attributes : |
|---|
| 189 | value = self.attribute_values[attr] |
|---|
| 190 | value = self.doc.normalizeEntities(value) |
|---|
| 191 | buffer += ' %s="%s"' % (attr, value) |
|---|
| 192 | if self.childNodes or self.nodeName in ['blockquote']: |
|---|
| 193 | buffer += ">" |
|---|
| 194 | for child in self.childNodes : |
|---|
| 195 | buffer += child.toxml() |
|---|
| 196 | if self.nodeName == 'p' : |
|---|
| 197 | buffer += "\n" |
|---|
| 198 | elif self.nodeName == 'li' : |
|---|
| 199 | buffer += "\n " |
|---|
| 200 | buffer += "</%s>" % self.nodeName |
|---|
| 201 | else : |
|---|
| 202 | buffer += "/>" |
|---|
| 203 | if self.nodeName in ['p', 'li', 'ul', 'ol', |
|---|
| 204 | 'h1', 'h2', 'h3', 'h4'] : |
|---|
| 205 | buffer += "\n" |
|---|
| 206 | |
|---|
| 207 | return buffer |
|---|
| 208 | |
|---|
| 209 | |
|---|
| 210 | class TextNode : |
|---|
| 211 | |
|---|
| 212 | type = "text" |
|---|
| 213 | attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} |
|---|
| 214 | |
|---|
| 215 | def __init__ (self, text) : |
|---|
| 216 | self.value = text |
|---|
| 217 | |
|---|
| 218 | def attributeCallback(self, match) : |
|---|
| 219 | self.parent.setAttribute(match.group(1), match.group(2)) |
|---|
| 220 | |
|---|
| 221 | def handleAttributes(self) : |
|---|
| 222 | self.value = self.attrRegExp.sub(self.attributeCallback, self.value) |
|---|
| 223 | |
|---|
| 224 | def toxml(self) : |
|---|
| 225 | text = self.value |
|---|
| 226 | if not text.startswith(HTML_PLACEHOLDER_PREFIX): |
|---|
| 227 | if self.parent.nodeName == "p" : |
|---|
| 228 | text = text.replace("\n", "\n ") |
|---|
| 229 | elif (self.parent.nodeName == "li" |
|---|
| 230 | and self.parent.childNodes[0]==self): |
|---|
| 231 | text = "\n " + text.replace("\n", "\n ") |
|---|
| 232 | text = self.doc.normalizeEntities(text) |
|---|
| 233 | return text |
|---|
| 234 | |
|---|
| 235 | |
|---|
| 236 | class EntityReference: |
|---|
| 237 | |
|---|
| 238 | type = "entity_ref" |
|---|
| 239 | |
|---|
| 240 | def __init__(self, entity): |
|---|
| 241 | self.entity = entity |
|---|
| 242 | |
|---|
| 243 | def handleAttributes(self): |
|---|
| 244 | pass |
|---|
| 245 | |
|---|
| 246 | def toxml(self): |
|---|
| 247 | return "&" + self.entity + ";" |
|---|
| 248 | |
|---|
| 249 | |
|---|
| 250 | """ |
|---|
| 251 | ====================================================================== |
|---|
| 252 | ========================== PRE-PROCESSORS ============================ |
|---|
| 253 | ====================================================================== |
|---|
| 254 | |
|---|
| 255 | Preprocessors munge source text before we start doing anything too |
|---|
| 256 | complicated. |
|---|
| 257 | |
|---|
| 258 | Each preprocessor implements a "run" method that takes a pointer to |
|---|
| 259 | a list of lines of the document, modifies it as necessary and |
|---|
| 260 | returns either the same pointer or a pointer to a new list. |
|---|
| 261 | """ |
|---|
| 262 | |
|---|
| 263 | class HeaderPreprocessor : |
|---|
| 264 | |
|---|
| 265 | """ |
|---|
| 266 | Replaces underlined headers with hashed headers to avoid |
|---|
| 267 | the nead for lookahead later. |
|---|
| 268 | """ |
|---|
| 269 | |
|---|
| 270 | def run (self, lines) : |
|---|
| 271 | |
|---|
| 272 | for i in range(len(lines)) : |
|---|
| 273 | if not lines[i] : |
|---|
| 274 | continue |
|---|
| 275 | |
|---|
| 276 | if lines[i].startswith("#") : |
|---|
| 277 | lines.insert(i+1, "\n") |
|---|
| 278 | |
|---|
| 279 | if (i+1 <= len(lines) |
|---|
| 280 | and lines[i+1] |
|---|
| 281 | and lines[i+1][0] in ['-', '=']) : |
|---|
| 282 | |
|---|
| 283 | underline = lines[i+1].strip() |
|---|
| 284 | |
|---|
| 285 | if underline == "="*len(underline) : |
|---|
| 286 | lines[i] = "# " + lines[i].strip() |
|---|
| 287 | lines[i+1] = "" |
|---|
| 288 | elif underline == "-"*len(underline) : |
|---|
| 289 | lines[i] = "## " + lines[i].strip() |
|---|
| 290 | lines[i+1] = "" |
|---|
| 291 | |
|---|
| 292 | return lines |
|---|
| 293 | |
|---|
| 294 | HEADER_PREPROCESSOR = HeaderPreprocessor() |
|---|
| 295 | |
|---|
| 296 | class LinePreprocessor : |
|---|
| 297 | """Deals with HR lines (needs to be done before processing lists)""" |
|---|
| 298 | |
|---|
| 299 | def run (self, lines) : |
|---|
| 300 | for i in range(len(lines)) : |
|---|
| 301 | if self._isLine(lines[i]) : |
|---|
| 302 | lines[i] = "<hr />" |
|---|
| 303 | return lines |
|---|
| 304 | |
|---|
| 305 | def _isLine(self, block) : |
|---|
| 306 | """Determines if a block should be replaced with an <HR>""" |
|---|
| 307 | if block.startswith(" ") : return 0 # a code block |
|---|
| 308 | text = "".join([x for x in block if not x.isspace()]) |
|---|
| 309 | if len(text) <= 2 : |
|---|
| 310 | return 0 |
|---|
| 311 | for pattern in ['isline1', 'isline2', 'isline3'] : |
|---|
| 312 | m = RE.regExp[pattern].match(text) |
|---|
| 313 | if (m and m.group(1)) : |
|---|
| 314 | return 1 |
|---|
| 315 | else: |
|---|
| 316 | return 0 |
|---|
| 317 | |
|---|
| 318 | LINE_PREPROCESSOR = LinePreprocessor() |
|---|
| 319 | |
|---|
| 320 | |
|---|
| 321 | class LineBreaksPreprocessor : |
|---|
| 322 | """Replaces double spaces at the end of the lines with <br/ >.""" |
|---|
| 323 | |
|---|
| 324 | def run (self, lines) : |
|---|
| 325 | for i in range(len(lines)) : |
|---|
| 326 | if (lines[i].endswith(" ") |
|---|
| 327 | and not RE.regExp['tabbed'].match(lines[i]) ): |
|---|
| 328 | lines[i] += "<br />" |
|---|
| 329 | return lines |
|---|
| 330 | |
|---|
| 331 | LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor() |
|---|
| 332 | |
|---|
| 333 | |
|---|
| 334 | class HtmlBlockPreprocessor : |
|---|
| 335 | """Removes html blocks from self.lines""" |
|---|
| 336 | |
|---|
| 337 | def run (self, lines) : |
|---|
| 338 | new_blocks = [] |
|---|
| 339 | text = "\n".join(lines) |
|---|
| 340 | for block in text.split("\n\n") : |
|---|
| 341 | if block.startswith("\n") : |
|---|
| 342 | block = block[1:] |
|---|
| 343 | if ( (block.startswith("<") and block.rstrip().endswith(">")) |
|---|
| 344 | and (block[1] in ["!", "?", "@", "%"] |
|---|
| 345 | or is_block_level( block[1:].replace(">", " ") |
|---|
| 346 | .split()[0].lower()))) : |
|---|
| 347 | new_blocks.append( |
|---|
| 348 | self.stash.store(block.strip())) |
|---|
| 349 | else : |
|---|
| 350 | new_blocks.append(block) |
|---|
| 351 | return "\n\n".join(new_blocks).split("\n") |
|---|
| 352 | |
|---|
| 353 | HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor() |
|---|
| 354 | |
|---|
| 355 | |
|---|
| 356 | class ReferencePreprocessor : |
|---|
| 357 | |
|---|
| 358 | def run (self, lines) : |
|---|
| 359 | new_text = []; |
|---|
| 360 | for line in lines: |
|---|
| 361 | m = RE.regExp['reference-def'].match(line) |
|---|
| 362 | if m: |
|---|
| 363 | id = m.group(2).strip().lower() |
|---|
| 364 | title = dequote(m.group(4).strip()) #.replace('"', """) |
|---|
| 365 | self.references[id] = (m.group(3), title) |
|---|
| 366 | else: |
|---|
| 367 | new_text.append(line) |
|---|
| 368 | return new_text #+ "\n" |
|---|
| 369 | |
|---|
| 370 | REFERENCE_PREPROCESSOR = ReferencePreprocessor() |
|---|
| 371 | |
|---|
| 372 | """ |
|---|
| 373 | ====================================================================== |
|---|
| 374 | ========================== INLINE PATTERNS =========================== |
|---|
| 375 | ====================================================================== |
|---|
| 376 | |
|---|
| 377 | Inline patterns such as *emphasis* are handled by means of auxiliary |
|---|
| 378 | objects, one per pattern. Each pattern object uses a single regular |
|---|
| 379 | expression and needs support the following methods: |
|---|
| 380 | |
|---|
| 381 | pattern.getCompiledRegExp() - returns a regular expression |
|---|
| 382 | |
|---|
| 383 | pattern.handleMatch(m, doc) - takes a match object and returns |
|---|
| 384 | a NanoDom node (as a part of the provided |
|---|
| 385 | doc) or None |
|---|
| 386 | |
|---|
| 387 | All of python markdown's built-in patterns subclass from BasePatter, |
|---|
| 388 | but you can add additional patterns that don't. |
|---|
| 389 | |
|---|
| 390 | Also note that all the regular expressions used by inline must |
|---|
| 391 | capture the whole block. For this reason, they all start with |
|---|
| 392 | '^(.*)' and end with '(.*)!'. In case with built-in expression |
|---|
| 393 | BasePattern takes care of adding the "^(.*)" and "(.*)!". |
|---|
| 394 | |
|---|
| 395 | Finally, the order in which regular expressions are applied is very |
|---|
| 396 | important - e.g. if we first replace http://.../ links with <a> tags |
|---|
| 397 | and _then_ try to replace inline html, we would end up with a mess. |
|---|
| 398 | So, we apply the expressions in the following order: |
|---|
| 399 | |
|---|
| 400 | * escape and backticks have to go before everything else, so |
|---|
| 401 | that we can preempt any markdown patterns by escaping them. |
|---|
| 402 | |
|---|
| 403 | * then we handle auto-links (must be done before inline html) |
|---|
| 404 | |
|---|
| 405 | * then we handle inline HTML. At this point we will simply |
|---|
| 406 | replace all inline HTML strings with a placeholder and add |
|---|
| 407 | the actual HTML to a hash. |
|---|
| 408 | |
|---|
| 409 | * then inline images (must be done before links) |
|---|
| 410 | |
|---|
| 411 | * then bracketed links, first regular then reference-style |
|---|
| 412 | |
|---|
| 413 | * finally we apply strong and emphasis |
|---|
| 414 | """ |
|---|
| 415 | |
|---|
| 416 | NOBRACKET = r'[^\]\[]*' |
|---|
| 417 | BRK = ( r'\[(' |
|---|
| 418 | + (NOBRACKET + r'(\['+NOBRACKET)*6 |
|---|
| 419 | + (NOBRACKET+ r'\])*'+NOBRACKET)*6 |
|---|
| 420 | + NOBRACKET + r')\]' ) |
|---|
| 421 | |
|---|
| 422 | BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2` |
|---|
| 423 | DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")`` |
|---|
| 424 | ESCAPE_RE = r'\\(.)' # \< |
|---|
| 425 | EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis* |
|---|
| 426 | STRONG_RE = r'\*\*(.*)\*\*' # **strong** |
|---|
| 427 | STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong*** |
|---|
| 428 | |
|---|
| 429 | if SMART_EMPHASIS: |
|---|
| 430 | EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_ |
|---|
| 431 | else : |
|---|
| 432 | EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_ |
|---|
| 433 | |
|---|
| 434 | STRONG_2_RE = r'__([^_]*)__' # __strong__ |
|---|
| 435 | STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___ |
|---|
| 436 | |
|---|
| 437 | LINK_RE = BRK + r'\s*\(([^\)]*)\)' # [text](url) |
|---|
| 438 | LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>) |
|---|
| 439 | IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' #  |
|---|
| 440 | REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]' # [Google][3] |
|---|
| 441 | IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2] |
|---|
| 442 | NOT_STRONG_RE = r'( \* )' # stand-alone * or _ |
|---|
| 443 | AUTOLINK_RE = r'<(http://[^>]*)>' # <http://www.123.com> |
|---|
| 444 | AUTOMAIL_RE = r'<([^> ]*@[^> ]*)>' # <me@example.com> |
|---|
| 445 | HTML_RE = r'(\<[^\>]*\>)' # <...> |
|---|
| 446 | ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & |
|---|
| 447 | |
|---|
| 448 | class BasePattern: |
|---|
| 449 | |
|---|
| 450 | def __init__ (self, pattern) : |
|---|
| 451 | self.pattern = pattern |
|---|
| 452 | self.compiled_re = re.compile("^(.*)%s(.*)$" % pattern, re.DOTALL) |
|---|
| 453 | |
|---|
| 454 | def getCompiledRegExp (self) : |
|---|
| 455 | return self.compiled_re |
|---|
| 456 | |
|---|
| 457 | class SimpleTextPattern (BasePattern) : |
|---|
| 458 | |
|---|
| 459 | def handleMatch(self, m, doc) : |
|---|
| 460 | return doc.createTextNode(m.group(2)) |
|---|
| 461 | |
|---|
| 462 | class SimpleTagPattern (BasePattern): |
|---|
| 463 | |
|---|
| 464 | def __init__ (self, pattern, tag) : |
|---|
| 465 | BasePattern.__init__(self, pattern) |
|---|
| 466 | self.tag = tag |
|---|
| 467 | |
|---|
| 468 | def handleMatch(self, m, doc) : |
|---|
| 469 | el = doc.createElement(self.tag) |
|---|
| 470 | el.appendChild(doc.createTextNode(m.group(2))) |
|---|
| 471 | return el |
|---|
| 472 | |
|---|
| 473 | class BacktickPattern (BasePattern): |
|---|
| 474 | |
|---|
| 475 | def __init__ (self, pattern): |
|---|
| 476 | BasePattern.__init__(self, pattern) |
|---|
| 477 | self.tag = "code" |
|---|
| 478 | |
|---|
| 479 | def handleMatch(self, m, doc) : |
|---|
| 480 | el = doc.createElement(self.tag) |
|---|
| 481 | text = m.group(2).strip() |
|---|
| 482 | text = text.replace("&", "&") |
|---|
| 483 | el.appendChild(doc.createTextNode(text)) |
|---|
| 484 | return el |
|---|
| 485 | |
|---|
| 486 | |
|---|
| 487 | class DoubleTagPattern (SimpleTagPattern) : |
|---|
| 488 | |
|---|
| 489 | def handleMatch(self, m, doc) : |
|---|
| 490 | tag1, tag2 = self.tag.split(",") |
|---|
| 491 | el1 = doc.createElement(tag1) |
|---|
| 492 | el2 = doc.createElement(tag2) |
|---|
| 493 | el1.appendChild(el2) |
|---|
| 494 | el2.appendChild(doc.createTextNode(m.group(2))) |
|---|
| 495 | return el1 |
|---|
| 496 | |
|---|
| 497 | |
|---|
| 498 | class HtmlPattern (BasePattern): |
|---|
| 499 | |
|---|
| 500 | def handleMatch (self, m, doc) : |
|---|
| 501 | place_holder = self.stash.store(m.group(2)) |
|---|
| 502 | return doc.createTextNode(place_holder) |
|---|
| 503 | |
|---|
| 504 | |
|---|
| 505 | class LinkPattern (BasePattern): |
|---|
| 506 | |
|---|
| 507 | def handleMatch(self, m, doc) : |
|---|
| 508 | el = doc.createElement('a') |
|---|
| 509 | el.appendChild(doc.createTextNode(m.group(2))) |
|---|
| 510 | parts = m.group(9).split() |
|---|
| 511 | # We should now have [], [href], or [href, title] |
|---|
| 512 | if parts : |
|---|
| 513 | el.setAttribute('href', parts[0]) |
|---|
| 514 | else : |
|---|
| 515 | el.setAttribute('href', "") |
|---|
| 516 | if len(parts) > 1 : |
|---|
| 517 | # we also got a title |
|---|
| 518 | title = " ".join(parts[1:]).strip() |
|---|
| 519 | title = dequote(title) #.replace('"', """) |
|---|
| 520 | el.setAttribute('title', title) |
|---|
| 521 | return el |
|---|
| 522 | |
|---|
| 523 | |
|---|
| 524 | class ImagePattern (BasePattern): |
|---|
| 525 | |
|---|
| 526 | def handleMatch(self, m, doc): |
|---|
| 527 | el = doc.createElement('img') |
|---|
| 528 | src_parts = m.group(9).split() |
|---|
| 529 | el.setAttribute('src', src_parts[0]) |
|---|
| 530 | if len(src_parts) > 1 : |
|---|
| 531 | el.setAttribute('title', dequote(" ".join(src_parts[1:]))) |
|---|
| 532 | if ENABLE_ATTRIBUTES : |
|---|
| 533 | text = doc.createTextNode(m.group(2)) |
|---|
| 534 | el.appendChild(text) |
|---|
| 535 | text.handleAttributes() |
|---|
| 536 | truealt = text.value |
|---|
| 537 | el.childNodes.remove(text) |
|---|
| 538 | else: |
|---|
| 539 | truealt = m.group(2) |
|---|
| 540 | el.setAttribute('alt', truealt) |
|---|
| 541 | return el |
|---|
| 542 | |
|---|
| 543 | class ReferencePattern (BasePattern): |
|---|
| 544 | |
|---|
| 545 | def handleMatch(self, m, doc): |
|---|
| 546 | if m.group(9) : |
|---|
| 547 | id = m.group(9).lower() |
|---|
| 548 | else : |
|---|
| 549 | # if we got something like "[Google][]" |
|---|
| 550 | # we'll use "google" as the id |
|---|
| 551 | id = m.group(2).lower() |
|---|
| 552 | if not self.references.has_key(id) : # ignore undefined refs |
|---|
| 553 | return None |
|---|
| 554 | href, title = self.references[id] |
|---|
| 555 | text = m.group(2) |
|---|
| 556 | return self.makeTag(href, title, text, doc) |
|---|
| 557 | |
|---|
| 558 | def makeTag(self, href, title, text, doc): |
|---|
| 559 | el = doc.createElement('a') |
|---|
| 560 | el.setAttribute('href', href) |
|---|
| 561 | if title : |
|---|
| 562 | el.setAttribute('title', title) |
|---|
| 563 | el.appendChild(doc.createTextNode(text)) |
|---|
| 564 | return el |
|---|
| 565 | |
|---|
| 566 | |
|---|
| 567 | class ImageReferencePattern (ReferencePattern): |
|---|
| 568 | |
|---|
| 569 | def makeTag(self, href, title, text, doc): |
|---|
| 570 | el = doc.createElement('img') |
|---|
| 571 | el.setAttribute('src', href) |
|---|
| 572 | if title : |
|---|
| 573 | el.setAttribute('title', title) |
|---|
| 574 | el.setAttribute('alt', text) |
|---|
| 575 | return el |
|---|
| 576 | |
|---|
| 577 | |
|---|
| 578 | class AutolinkPattern (BasePattern): |
|---|
| 579 | |
|---|
| 580 | def handleMatch(self, m, doc): |
|---|
| 581 | el = doc.createElement('a') |
|---|
| 582 | el.setAttribute('href', m.group(2)) |
|---|
| 583 | el.appendChild(doc.createTextNode(m.group(2))) |
|---|
| 584 | return el |
|---|
| 585 | |
|---|
| 586 | class AutomailPattern (BasePattern): |
|---|
| 587 | |
|---|
| 588 | def handleMatch(self, m, doc) : |
|---|
| 589 | el = doc.createElement('a') |
|---|
| 590 | email = m.group(2) |
|---|
| 591 | if email.startswith("mailto:"): |
|---|
| 592 | email = email[len("mailto:"):] |
|---|
| 593 | for letter in email: |
|---|
| 594 | entity = doc.createEntityReference("#%d" % ord(letter)) |
|---|
| 595 | el.appendChild(entity) |
|---|
| 596 | mailto = "mailto:" + email |
|---|
| 597 | mailto = "".join(['&#%d;' % ord(letter) for letter in mailto]) |
|---|
| 598 | el.setAttribute('href', mailto) |
|---|
| 599 | return el |
|---|
| 600 | |
|---|
| 601 | ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE) |
|---|
| 602 | NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE) |
|---|
| 603 | |
|---|
| 604 | BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE) |
|---|
| 605 | DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE) |
|---|
| 606 | STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong') |
|---|
| 607 | STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong') |
|---|
| 608 | EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em') |
|---|
| 609 | EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em') |
|---|
| 610 | |
|---|
| 611 | STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em') |
|---|
| 612 | STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em') |
|---|
| 613 | |
|---|
| 614 | LINK_PATTERN = LinkPattern(LINK_RE) |
|---|
| 615 | LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE) |
|---|
| 616 | IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE) |
|---|
| 617 | IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE) |
|---|
| 618 | REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE) |
|---|
| 619 | |
|---|
| 620 | HTML_PATTERN = HtmlPattern(HTML_RE) |
|---|
| 621 | ENTITY_PATTERN = HtmlPattern(ENTITY_RE) |
|---|
| 622 | |
|---|
| 623 | AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE) |
|---|
| 624 | AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE) |
|---|
| 625 | |
|---|
| 626 | |
|---|
| 627 | """ |
|---|
| 628 | ====================================================================== |
|---|
| 629 | ========================== POST-PROCESSORS =========================== |
|---|
| 630 | ====================================================================== |
|---|
| 631 | |
|---|
| 632 | Markdown also allows post-processors, which are similar to |
|---|
| 633 | preprocessors in that they need to implement a "run" method. Unlike |
|---|
| 634 | pre-processors, they take a NanoDom document as a parameter and work |
|---|
| 635 | with that. |
|---|
| 636 | # |
|---|
| 637 | There are currently no standard post-processors, but the footnote |
|---|
| 638 | extension below uses one. |
|---|
| 639 | """ |
|---|
| 640 | """ |
|---|
| 641 | ====================================================================== |
|---|
| 642 | ========================== MISC AUXILIARY CLASSES ==================== |
|---|
| 643 | ====================================================================== |
|---|
| 644 | """ |
|---|
| 645 | |
|---|
| 646 | class HtmlStash : |
|---|
| 647 | """This class is used for stashing HTML objects that we extract |
|---|
| 648 | in the beginning and replace with place-holders.""" |
|---|
| 649 | |
|---|
| 650 | def __init__ (self) : |
|---|
| 651 | self.html_counter = 0 # for counting inline html segments |
|---|
| 652 | self.rawHtmlBlocks=[] |
|---|
| 653 | |
|---|
| 654 | def store(self, html) : |
|---|
| 655 | """Saves an HTML segment for later reinsertion. Returns a |
|---|
| 656 | placeholder string that needs to be inserted into the |
|---|
| 657 | document. |
|---|
| 658 | |
|---|
| 659 | @param html: an html segment |
|---|
| 660 | @returns : a placeholder string """ |
|---|
| 661 | self.rawHtmlBlocks.append(html) |
|---|
| 662 | placeholder = HTML_PLACEHOLDER % self.html_counter |
|---|
| 663 | self.html_counter += 1 |
|---|
| 664 | return placeholder |
|---|
| 665 | |
|---|
| 666 | |
|---|
| 667 | class BlockGuru : |
|---|
| 668 | |
|---|
| 669 | def _findHead(self, lines, fn, allowBlank=0) : |
|---|
| 670 | |
|---|
| 671 | """Functional magic to help determine boundaries of indented |
|---|
| 672 | blocks. |
|---|
| 673 | |
|---|
| 674 | @param lines: an array of strings |
|---|
| 675 | @param fn: a function that returns a substring of a string |
|---|
| 676 | if the string matches the necessary criteria |
|---|
| 677 | @param allowBlank: specifies whether it's ok to have blank |
|---|
| 678 | lines between matching functions |
|---|
| 679 | @returns: a list of post processes items and the unused |
|---|
| 680 | remainder of the original list""" |
|---|
| 681 | |
|---|
| 682 | items = [] |
|---|
| 683 | item = -1 |
|---|
| 684 | |
|---|
| 685 | i = 0 # to keep track of where we are |
|---|
| 686 | |
|---|
| 687 | for line in lines : |
|---|
| 688 | |
|---|
| 689 | if not line.strip() and not allowBlank: |
|---|
| 690 | return items, lines[i:] |
|---|
| 691 | |
|---|
| 692 | if not line.strip() and allowBlank: |
|---|
| 693 | # If we see a blank line, this _might_ be the end |
|---|
| 694 | i += 1 |
|---|
| 695 | |
|---|
| 696 | # Find the next non-blank line |
|---|
| 697 | for j in range(i, len(lines)) : |
|---|
| 698 | if lines[j].strip() : |
|---|
| 699 | next = lines[j] |
|---|
| 700 | break |
|---|
| 701 | else : |
|---|
| 702 | # There is no more text => this is the end |
|---|
| 703 | break |
|---|
| 704 | |
|---|
| 705 | # Check if the next non-blank line is still a part of the list |
|---|
| 706 | |
|---|
| 707 | part = fn(next) |
|---|
| 708 | |
|---|
| 709 | if part : |
|---|
| 710 | items.append("") |
|---|
| 711 | continue |
|---|
| 712 | else : |
|---|
| 713 | break # found end of the list |
|---|
| 714 | |
|---|
| 715 | part = fn(line) |
|---|
| 716 | |
|---|
| 717 | if part : |
|---|
| 718 | items.append(part) |
|---|
| 719 | i += 1 |
|---|
| 720 | continue |
|---|
| 721 | else : |
|---|
| 722 | return items, lines[i:] |
|---|
| 723 | else : |
|---|
| 724 | i += 1 |
|---|
| 725 | |
|---|
| 726 | return items, lines[i:] |
|---|
| 727 | |
|---|
| 728 | |
|---|
| 729 | def detabbed_fn(self, line) : |
|---|
| 730 | """ An auxiliary method to be passed to _findHead """ |
|---|
| 731 | m = RE.regExp['tabbed'].match(line) |
|---|
| 732 | if m: |
|---|
| 733 | return m.group(4) |
|---|
| 734 | else : |
|---|
| 735 | return None |
|---|
| 736 | |
|---|
| 737 | |
|---|
| 738 | def detectTabbed(self, lines) : |
|---|
| 739 | |
|---|
| 740 | return self._findHead(lines, self.detabbed_fn, |
|---|
| 741 | allowBlank = 1) |
|---|
| 742 | |
|---|
| 743 | |
|---|
| 744 | def print_error(string): |
|---|
| 745 | """Print an error string to stderr""" |
|---|
| 746 | sys.stderr.write(string +' |
|---|