HTML Codec July 26, 2006
A very simple and straight-forward text/HTML codec. When encoding text, it escapes all
HTML-delimiters (<
becomes <
, etc.), so the encoded text can be safely viewed by an
HTML renderer (browser) or safely embedded into an HTML document. When decoding HTML, it
unescapes the formatters into plain text (so that >
becomes >
again, etc.).
Code
def encode(input, tabsize = 4):
return (input
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace('"', """)
.replace("\n", "<br/>")
.replace("\t", "	" + " " * tabsize)
.replace(" ", " "))
def decode(input, tabsize = 4):
return (input
.replace(" ", " ")
.replace("	" + " " * tabsize, "\t")
.replace("<br>", "\n")
.replace("<br/>", "\n")
.replace(""", '"')
.replace("<", "<")
.replace(">", ">")
.replace("&", "&"))
#
# Codec APIs (if you place this file in lib/encodings, you can use
# str.encode("html") and str.decode("html")
#
import codecs
class HtmlCodec(codecs.Codec):
def __init__(self, tabsize = 4):
self.tabsize = tabsize
def encode(self, input, errors = "strict"):
return encode(input, self.tabsize), len(input)
def decode(self, input, errors = "strict"):
return decode(input), len(input)
class StreamWriter(HtmlCodec, codecs.StreamWriter):
pass
class StreamReader(HtmlCodec, codecs.StreamReader):
pass
def getregentry():
hc = HtmlCodec()
return (hc.encode, hc.decode, StreamReader, StreamWriter)
Example
This module can be used as a standalone module
>>> import htmlcodec
>>> htmlcodec.encode("blah > yada")
'blah >&bnsp;yada"
Or you can place it in your interpreter’s directory, as for instance, and then use
>>> '(blah > yada) & "wow"\ni eat babies'.encode("html")
'(blah > yada) & "wow"<br/>i eat babies'
>>> _.decode("html")
'(blah > yada) & "wow"\ni eat babies'