#!/usr/bin/env python
# Copyright (c) 2014-2015 Benjamin Althues <benjamin@babab.nl>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import fileinput
__docformat__ = 'restructuredtext'
__author__ = "Benjamin Althues"
__copyright__ = "Copyright (C) 2014-2015 Benjamin Althues"
__version_info__ = (0, 1, 1, 'beta', 0)
__version__ = '0.1.1'
## Setting defaults ##########################################################
DEFAULT_INPUT_INDENT = 2
'''The standard value of tuhinga's indentation is 2 spaces'''
DEFAULT_OUTPUT_INDENT = 2
'''The output can be set as a negative value to create condensed one liners'''
## Default mapper for LexerXML ###############################################
mapper = {
'html5': {
'area': {'v': True},
'base': {'v': True},
'br': {'v': True},
'col': {'v': True},
'embed': {'v': True},
'hr': {'v': True},
'img': {'v': True},
'keygen': {'v': True},
'param': {'v': True},
'source': {'v': True},
'track': {'v': True},
'wbr': {'v': True},
'css': {'v': True, 'e': 'link', 'c': 'href', 'h': 'rel="stylesheet"'},
'input': {'v': True, 'c': 'value'},
'input-button': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="button"'},
'input-checkbox': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="checkbox"'},
'input-color': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="color"'},
'input-date': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="date"'},
'input-datetime': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="datetime"'},
'input-datetime-local': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="datetime-local"'},
'input-email': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="email"'},
'input-file': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="file"'},
'input-hidden': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="hidden"'},
'input-image': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="image"'},
'input-month': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="month"'},
'input-number': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="number"'},
'input-password': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="password"'},
'input-radio': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="radio"'},
'input-range': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="range"'},
'input-reset': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="reset"'},
'input-search': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="search"'},
'input-submit': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="submit"'},
'input-tel': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="tel"'},
'input-text': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="text"'},
'input-time': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="time"'},
'input-url': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="url"'},
'input-week': {'v': True, 'e': 'input', 'c': 'value',
'h': 'type="week"'},
'js': {'e': 'script', 'c': 'src', 'h': 'type="text/javascript"'},
'link': {'v': True, 'c': 'href'},
'meta': {'v': True, 'c': 'content'},
'meta-charset': {'v': True, 'e': 'meta', 'c': 'charset'},
'script-src': {'e': 'script', 'c': 'src'},
},
}
'''Mapping of contents to arguments / list of void elements
Possible keys:
- 'v': True if void element like <meta>. Default = false
- 'e': HTML element. Default = <name_of_dict_key>
- 'c': Content mapping, see below. Default = '>'
- 'h': Extra html arguments. Default = false
Possible value of content:
- '>': print contents after start tag (default)
- '-': strip any contents
- 'some-string': map any contents to an html argument
'''
## Parser and Lexer objects ##################################################
class LexerError(Exception):
pass
[docs]class Parser:
'''Parse a tuhinga doc and create nodes to be processed with a lexer'''
def __init__(self, input_indent=DEFAULT_INPUT_INDENT):
'''Handle args and initialize instance variables'''
self.input_indent = input_indent
self.latest_indentlvl = 0
self.lineno = 0
self.current_indentlvl = 0
self.nodes = []
self.parsed = []
for i in range(0, 100):
self.parsed.append(None)
[docs] def string(self, string):
'''Parse a complete tuhinga document as string'''
for line in string.split('\n'):
self.parseLine(line)
return self.close()
[docs] def file(self, filename):
'''Parse a complete tuhinga document by filename'''
with open(filename) as f:
for line in f:
self.parseLine(line)
return self.close()
[docs] def close(self):
'''Close all open nodes'''
self._closeNodes(0)
return self
[docs] def parseLine(self, line):
'''Parse a single line of tuhinga markup
Make sure to run close() after the last call to parseLine.'''
self.lineno += 1
indentlvl = int((len(line) - len(line.lstrip())) / self.input_indent)
splitted = line.lstrip().split()
# Skip empty lines and comment lines
if not splitted or splitted[0].startswith(';'):
return self
# parse element, id and classes
identifier = splitted[0]
_id = None
_class = []
if '#' in identifier:
element = identifier[:identifier.find('#')]
if '.' in identifier:
_id = identifier[identifier.find('#') + 1:identifier.find('.')]
_class = identifier.split('.')[1:]
else:
_id = identifier[identifier.find('#') + 1:]
elif '.' in identifier:
element = identifier[:identifier.find('.')]
_class = identifier.split('.')[1:]
else:
element = identifier
if identifier.startswith('#') or identifier.startswith('.'):
element = 'div'
# parse content and arguments
remainder = splitted[1:]
content = []
args = []
# If a word starts with ':' and is not an argument,
# it should be escaped '\:'
for i in remainder:
if i.startswith(':'):
args.append(i[1:])
else:
content.append(i)
data = {
'indentlvl': indentlvl,
'element': element,
'id': _id,
'class': _class,
'arguments': args,
'content': ' '.join(content),
'line': self.lineno,
'splitted': splitted,
}
# register node to handle the tree structure
self._registerNode(indentlvl, data)
return self
def _registerNode(self, indentlvl, data):
if indentlvl < self.current_indentlvl:
self._closeNodes(indentlvl)
self.parsed[indentlvl] = data
self.nodes.append((1, data))
self.latest_indentlvl = indentlvl
self.current_indentlvl = indentlvl
def _closeNodes(self, indentlvl):
self.parsed[self.latest_indentlvl] = None
for i in range(99, indentlvl - 1, -1):
if self.parsed[i]:
self.nodes.append((0, self.parsed[i]))
self.parsed[i] = None
[docs]class LexerXML:
'''Lexical compilation of parsed nodes to XML markup'''
def __init__(self, parser, output_indent=DEFAULT_OUTPUT_INDENT):
'''Object init is the only public method'''
self.output = ''
self.doctype = 'html5'
self.output_indent = output_indent
n = 0
for node in parser.nodes:
if node[0] == 1:
try:
next_lvl = parser.nodes[n + 1][1]['indentlvl']
except IndexError:
raise LexerError('Markup Tree Error: parser did not '
'properly close all nodes')
self._startNode(data=node[1], next_lvl=next_lvl)
elif node[0] == 0:
self._endNode(data=node[1])
n += 1
def _startNode(self, data, next_lvl):
out = ''
is_element = True
# defaults, possibly overridden by mapping
element = data['element']
content_dest = '>'
extra_args = ''
void_elem = False
if data['element'] in mapper[self.doctype].keys():
# apply mapping
if 'e' in mapper[self.doctype][data['element']]:
element = mapper[self.doctype][data['element']]['e']
if 'v' in mapper[self.doctype][data['element']]:
void_elem = mapper[self.doctype][data['element']]['v']
if 'c' in mapper[self.doctype][data['element']]:
content_dest = mapper[self.doctype][data['element']]['c']
if 'h' in mapper[self.doctype][data['element']]:
extra_args = mapper[self.doctype][data['element']]['h']
# hardcoded special elements
if element == 'html5':
# Do not print a newline if output_indent setting <= -1
newl = '\n' if self.output_indent > -1 else ''
self._addOutput(
data['indentlvl'],
'<!doctype html>{newl}{indent}<html>'.format(
newl=newl,
indent=((' ' * self.output_indent) * data['indentlvl'])
)
)
return self
elif element == '::':
is_element = False
if is_element:
out += '<' + element # Begin start tag
out += ' id="{}"'.format(data['id']) if data['id'] else ''
if data['class']:
out += ' class="{}"'.format(' '.join(data['class']))
out += ' {}'.format(extra_args) if extra_args else ''
for a in data['arguments']:
arg = a.split('=')
out += ' {}="{}"'.format(arg[0], arg[1])
# Use content as argument according to mapping
if data['content'] and content_dest != '>' and content_dest != '-':
out += ' {}="{}"'.format(content_dest, data['content'])
out += '>' # Close start tag
# Add content, if any.
# Properly align content depending on children nodes
if data['content'] and content_dest == '>':
if data['indentlvl'] >= next_lvl:
out += data['content']
else:
out += '\n{}{}'.format(self._indent(next_lvl), data['content'])
# close tag if node has no children nodes
if is_element and not void_elem:
if data['indentlvl'] >= next_lvl:
out += '</{}>'.format(element)
self._addOutput(data['indentlvl'], out)
def _endNode(self, data):
if data['element'] == 'html5':
self._addOutput(data['indentlvl'], '</html>')
return self
self._addOutput(data['indentlvl'], '</{}>'.format(data['element']))
def _indent(self, indentlvl):
return (' ' * self.output_indent) * indentlvl
def _addOutput(self, indentlvl, contents):
# Do not print a newline if output_indent setting <= -1 and
# unescape any special tokens
newl = '\n' if self.output_indent > -1 else ''
contents = contents.replace('\\:', ':')
self.output += self._indent(indentlvl) + contents + newl
## Shortcut functions ########################################################
[docs]def string(string, input_indent=DEFAULT_INPUT_INDENT,
output_indent=DEFAULT_OUTPUT_INDENT):
'''Shortcut for parsing, lexing and mapping a document from a string'''
parser = Parser(input_indent=input_indent).string(string)
return LexerXML(parser, output_indent=output_indent).output
[docs]def file(filelocation, input_indent=DEFAULT_INPUT_INDENT,
output_indent=DEFAULT_OUTPUT_INDENT):
'''Shortcut for parsing, lexing and mapping a document from file'''
parser = Parser(input_indent=input_indent).file(filelocation)
return LexerXML(parser, output_indent=output_indent).output
[docs]def stdin(input_indent=DEFAULT_INPUT_INDENT,
output_indent=DEFAULT_OUTPUT_INDENT):
'''Shortcut for parsing, lexing and mapping from stdin/fileinput'''
parser = Parser(input_indent=input_indent).fileinput()
return LexerXML(parser, output_indent=output_indent).output
## When invoked as script, read files or stdin ###############################
if __name__ == '__main__':
# print(file('examples/dev-test.tuh'))
try:
print(stdin())
except KeyboardInterrupt:
print('Bye')