# Pylighter - monochromacy.net # HTML syntax highlighting for Python # based on the MoinMoin Python Source Parser - moinmo.in # compatible with the Manoli highlighting styles - www.manoli.net/csharpformat/ import cgi, string, sys, StringIO import keyword, token, tokenize _KEYWORD = token.NT_OFFSET + 1 _classes = { token.NUMBER: 'str', token.OP: 'op', token.STRING: 'str', tokenize.COMMENT: 'rem', token.ERRORTOKEN: 'kwrd', _KEYWORD: 'kwrd', } class Parser: """ Send colored python source. """ def __init__(self, raw, includePreamble, out = sys.stdout): """ Store the source text. """ self.raw = string.strip(string.expandtabs(raw)) self.includePreamble = includePreamble self.out = out def format(self, formatter, form): """ Parse and send the colored source. """ if self.includePreamble: self.out.write('\n') self.out.write('\n') self.out.write('\n') self.out.write('\n') self.out.write('\n') self.out.write('\n') self.lineNum = 1 self.newlineRequired = True self.colPos = 0 self.out.write('
\n') tokenize.tokenize(StringIO.StringIO(self.raw).readline, self) self.out.write('\n') self.out.write('
\n') if self.includePreamble: self.out.write('\n') self.out.write('\n') def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line): """ Token handler. """ if 0: print "type", toktype, token.tok_name[toktype], "text", toktext, print "start", srow,scol, "end", erow,ecol, "
" # Handle multi-line strings with sneaky recursion if toktype == token.STRING and toktext.count('\n') > 0: lines = toktext.split('\n') for i in range(len(lines)): self.__call__(token.STRING, lines[i], (0, 0), (0, len(lines[i])), lines[i]) if i < len(lines) - 1: self.__call__(token.NEWLINE, '', (0, 0), (0, 0), lines[i]) self.newlineRequired = False self.colPos = 0 return # Write the line number if required if self.newlineRequired: self.out.write('
{0}{1}:   '.format(' ' * (4 - len(str(self.lineNum))), self.lineNum))
            self.newlineRequired = False
        
        # Handle newlines
        if toktype in [token.NEWLINE, tokenize.NL]:
            self.out.write('
\n') self.lineNum = self.lineNum + 1 self.colPos = 0 self.newlineRequired = True return # Rewrite stripped out whitespace if scol > self.colPos: self.out.write(line[self.colPos:scol]) # Do some token type wrangling if token.LPAR <= toktype and toktype <= token.OP: toktype = token.OP elif toktype == token.NAME and keyword.iskeyword(toktext): toktype = _KEYWORD # Write the token with the relevant style cssClass = _classes.get(toktype, None) if cssClass != None: self.out.write('' % (cssClass)) self.out.write(cgi.escape(line[scol:ecol])) self.out.write('') else: self.out.write(cgi.escape(line[scol:ecol])) # Update the last character position so we can tell when whitespace # is dropped self.colPos = ecol if __name__ == "__main__": import os source = open(sys.argv[1]).read() outfile = sys.argv[1] + '.html' Parser(source, True, open(outfile, 'wt')).format(None, None) if os.name == "nt": os.system("explorer " + outfile) else: os.system("netscape " + outfile + " &")