#coding: utf-8
import workflow
TYP_ROOT = 0
TYP_HEAD = 1
TYP_UNORDERED = 2
TYP_ORDERED = 3
TYP_BODY = 4
TYP_QUOTE = 5
TYP_CODE = 6
TYP_RULE = 7
TYP_EMPTY = 8
TYP_OTHER = 9
STR_HASH = '#'
STR_STAR = '*'
STR_DASH = '-'
STR_QUOTE = '>'
STR_SPACE = ' '
STR_DOT = '.'
STR_TAB = '\t'
def main():
lst_Nest = addParentChild(parse_text(workflow.get_input()))
workflow.set_output(make_tabbed(lst_Nest))
def make_tabbed(lst_Nodes):
str_out = ''
for dct_Node in lst_Nodes[1:]:
if (dct_Node['type'] in set([TYP_HEAD, TYP_ORDERED, TYP_UNORDERED])):
n_Tabs = dct_Node['level']-1
else:
n_Tabs = dct_Node['level']
str_out = ''.join([str_out, '\t' * n_Tabs, dct_Node['text']])
return str_out
def addParentChild(lst_Lines):
"""Add ParentID and ChildIDs to each node"""
# The parent of Level 1 headers (and pre-header full-left lines) is the virtual root.
lst_Parents = [0]
lng_CurrentHash = 0
for dct_Line in lst_Lines:
dct_Line['chiln'] = []
lng_ID = dct_Line['id']
vType = dct_Line['type']
if (vType != TYP_ROOT):
lng_Depth = dct_Line['indents'] + 1
# get nesting level (prevailing header level + any tab indents)
if (vType != TYP_HEAD):
if (vType != TYP_EMPTY):
lng_Level = lng_CurrentHash + lng_Depth
else:
lng_Level = 1
else:
lng_CurrentHash = lng_Depth
lng_Level = lng_Depth
# FIND current parent of this level
dct_Line['level'] = lng_Level
while (len(lst_Parents) < lng_Level):
lst_Parents.append(lst_Parents[-1])
id_Parent = lst_Parents[lng_Level-1]
# RECORD parent-child relationship
dct_Line['parentID'] = id_Parent
lst_Lines[id_Parent]['chiln'].append(lng_ID)
# UPDATE current level-parent list
if (len(lst_Parents) <= (lng_Level)):
lst_Parents.append(lng_ID)
else:
lst_Parents[lng_Level] = lng_ID
return lst_Lines
def parse_text(str_Text):
"""Return a list of line records
Line properties:
Markdown type, prefix depth (hashes/tabs), line number as id, StartPosn
"""
lst_Lines = [{'type':TYP_ROOT, 'indents':0, 'id':0, 'text':'VIRTUAL_ROOT'}]
lng_Posn = 0
for i_Line, strLine in enumerate(iter(str_Text.splitlines(True))):
lst_Lines.append(parse_line(i_Line+1, lng_Posn, strLine))
lng_Posn += len(strLine)
return lst_Lines
def parse_line(int_ID, int_Posn, str_Line):
"""
Get the type, hash or tab level, and non-prefix text of an MD or tab-indented line
NB code blocks are assumed start with 4 spaces. All tabbed lines are read as nested.
"""
lng_Level = 0
lng_Type = TYP_EMPTY
str_Prev = ''
lng_Space = 0
bln_Bullet = 0
if (str_Line != ''):
for i, c in enumerate(str_Line):
# FIRST CHAR OF THE LINE ?
if (str_Prev == ''):
if (c == STR_TAB):
lng_Level = 1
elif (c == STR_DASH or c == STR_STAR):
bln_Bullet = 1
elif (c == STR_SPACE):
lng_Space = 1
else:
if (c != STR_HASH and c != STR_QUOTE and not c.isdigit()):
return {'id':int_ID, 'indents':0, 'type':TYP_BODY, 'posn':int_Posn, 'text':str_Line}
# FOLLOWING A HASH ?
elif (str_Prev == STR_HASH):
if (c == STR_HASH):
lng_Level += 1
elif (c == STR_SPACE):
return {'id':int_ID, 'indents':lng_Level, 'type':TYP_HEAD, 'posn':int_Posn, 'text':str_Line[i+1:]}
else:
return {'id':int_ID, 'indents':0, 'type':TYP_OTHER, 'posn':int_Posn, 'text':str_Line}
# FOLLOWING A BULLET ?
elif (str_Prev == STR_DASH or str_Prev == STR_STAR):
if (c == STR_DASH or c == STR_STAR):
bln_Bullet += 1
if (bln_Bullet > 2):
return {'id':int_ID, 'indents':0, 'type':TYP_RULE, 'posn':int_Posn, 'text':str_Line.lstrip()}
elif (c != STR_SPACE):
return {'id':int_ID, 'indents':0, 'type':TYP_OTHER, 'posn':int_Posn, 'text':str_Line}
# FOLLOWING A TAB ?
elif (str_Prev == STR_TAB):
if (c == STR_TAB):
lng_Level += 1
elif (c == STR_DASH or c == STR_STAR):
bln_Bullet += 1
elif (not c.isdigit()):
return {'id':int_ID, 'indents':lng_Level, 'type':TYP_BODY, 'posn':int_Posn, 'text':str_Line[i:]}
# FOLLOWING A DIGIT ?
elif (str_Prev.isdigit()):
if (c != STR_DOT and not c.isdigit()):
return {'id':int_ID, 'indents':0, 'type':TYP_OTHER, 'posn':int_Posn, 'text':str_Line}
# FOLLOWING A DOT ?
elif (str_Prev == STR_DOT):
if (c == STR_SPACE):
return {'id':int_ID, 'indents':lng_Level, 'type':TYP_ORDERED, 'posn':int_Posn, 'text':str_Line[i+1:]}
else:
return {'id':int_ID, 'indents':0, 'type':TYP_OTHER, 'posn':int_Posn, 'text':str_Line}
# FOLLOWING A '>' ?
elif (str_Prev == STR_QUOTE):
if (c == STR_SPACE):
return {'id':int_ID, 'indents':0, 'type':TYP_QUOTE, 'posn':int_Posn, 'text':str_Line[i+1:]}
elif (c != STR_QUOTE) :
return {'id':int_ID, 'indents':0, 'type':TYP_OTHER, 'posn':int_Posn, 'text':str_Line}
# FOLLOWING A SPACE ?
elif (str_Prev == STR_SPACE):
if (c == STR_SPACE):
lng_Space+=1
if (lng_Space > 3):
return {'id':int_ID, 'indents':0, 'type':TYP_CODE, 'posn':int_Posn, 'text':str_Line[i+1:]}
elif (c == STR_DASH or c == STR_STAR):
bln_Bullet += 1
if (bln_Bullet > 2):
return {'id':int_ID, 'indents':0, 'type':TYP_RULE, 'posn':int_Posn, 'text':str_Line.lstrip()}
elif (bln_Bullet > 0):
return {'id':int_ID, 'indents':lng_Level, 'type':TYP_UNORDERED, 'posn':int_Posn, 'text':str_Line[i:]}
else:
return {'id':int_ID, 'indents':0, 'type':TYP_BODY, 'posn':int_Posn, 'text':str_Line}
else:
return {'id':int_ID, 'indents':0, 'type':TYP_OTHER, 'posn':int_Posn, 'text':str_Line}
str_Prev = c
else:
return {'id':int_ID, 'indents':0, 'type':TYP_EMPTY, 'posn':int_Posn, 'text':''}
return {'id':int_ID, 'indents':0, 'type':TYP_BODY, 'posn':int_Posn, 'text':str_Line}
main()
There are no comments yet.