root/branches/mbutscher/work/extensions/wikidPadParser/WikidPadParser.py @ 287

Revision 287, 80.0 kB (checked in by mbutscher, 3 years ago)

branches/mbutscher/work:
* Table option to set CSS class (Ross' rep.: part of

073653527c0ea0109c762007b27093f28cab2061)

Line 
1## import hotshot
2## _prof = hotshot.Profile("hotshot.prf")
3
4# Official parser plugin for wiki language "WikidPad default 2.0"
5# Last modified (format YYYY-MM-DD): 2011-07-21
6
7
8import locale, pprint, time, sys, string, traceback
9
10from textwrap import fill
11
12import wx
13
14import re    # from pwiki.rtlibRepl import re
15from pwiki.WikiExceptions import *
16from pwiki.StringOps import UPPERCASE, LOWERCASE, revStr, urlFromPathname, \
17        urlQuoteSpecific
18
19from pwiki.WikiDocument import WikiDocument
20from pwiki.OptionsDialog import PluginOptionsPanel
21
22sys.stderr = sys.stdout
23
24locale.setlocale(locale.LC_ALL, '')
25
26from pwiki.WikiPyparsing import *
27
28
29WIKIDPAD_PLUGIN = (("WikiParser", 1),)
30
31WIKI_LANGUAGE_NAME = "wikidpad_default_2_0"
32WIKI_HR_LANGUAGE_NAME = u"WikidPad default 2.0"
33
34
35LETTERS = UPPERCASE + LOWERCASE
36
37
38# The specialized optimizer in WikiPyParsing can't handle automatic whitespace
39# removing
40ParserElement.setDefaultWhitespaceChars("")
41
42
43
44RE_FLAGS = re.DOTALL | re.UNICODE | re.MULTILINE
45
46
47class IndentInfo(object):
48    __slots__ = ("level", "type")
49    def __init__(self, type):
50        self.level = 0
51        self.type = type
52
53
54
55def buildRegex(regex, name=None, hideOnEmpty=False):
56    if name is None:
57        element = Regex(regex, RE_FLAGS)
58    else:
59        element = Regex(regex, RE_FLAGS).setResultsName(name).setName(name)
60   
61    if hideOnEmpty:
62        element.setParseAction(actionHideOnEmpty)
63       
64    return element
65
66stringEnd = buildRegex(ur"(?!.)", "stringEnd")
67
68
69
70def getFirstTerminalNode(t):
71    if t.getChildrenCount() == 0:
72        return None
73   
74    lt = t[-1]
75    if not isinstance(lt, TerminalNode):
76        return None
77       
78    return lt
79
80
81def actionHideOnEmpty(s, l, st, t):
82    if t.strLength == 0:
83        return []
84
85
86def actionCutRightWhitespace(s, l, st, t):
87    lt = getFirstTerminalNode(t)
88    if lt is None:
89        return None
90
91    txt = lt.getText()
92    for i in xrange(len(txt) - 1, -1, -1):
93        if txt[i] not in (u"\t", u" ", u"\n", u"\r"):
94            if i < len(txt) - 1:
95                lt.text = txt[:i+1]
96                lt.recalcStrLength()
97               
98                t2 = buildSyntaxNode(txt[i+1:], lt.pos + i + 1)
99                t.append(t2)
100            return t
101
102    return None
103
104 
105_CHECK_LEFT_RE = re.compile(ur"[ \t]*$", RE_FLAGS)
106
107
108def preActCheckNothingLeft(s, l, st, pe):
109    # Technically we have to look behind, but this is not supported very
110    # well by reg. ex., so we take the reverse string and look ahead
111    revText = st.revText
112    revLoc = len(s) - l
113#     print "--preActCheckNothingLeft4", repr((revLoc, revText[revLoc: revLoc+20]))
114    if not _CHECK_LEFT_RE.match(revText, revLoc):
115        raise ParseException(s, l, "left of block markup (e.g. table) not empty")
116
117
118def validateNonEmpty(s, l, st, t):
119    if t.strLength == 0:
120        raise ParseException(s, l, "matched token must not be empty")
121
122
123
124
125def precTest(s, l, st, pe):
126    print "--precTest", repr((l, st, type(pe)))
127
128
129
130def createCheckNotIn(tokNames):
131    tokNames = frozenset(tokNames)
132
133    def checkNoContain(s, l, st, pe):
134        for tn in tokNames:
135            if tn in st.nameStack[:-1]:
136#                 print "--notcontain exc", repr(st.nameStack[:-1]), tn
137                raise ParseException(s, l, "token '%s' is not allowed here" % tn)
138
139    return checkNoContain
140
141
142def pseudoActionFindMarkup(s, l, st, t):
143    if t.strLength == 0:
144        return []
145    t.name = "plainText"
146    return t
147
148
149
150
151
152# Forward definition of normal content and content in table cells, headings, ...
153content = Forward()
154oneLineContent = Forward()
155
156tableContentInCell = Forward().setResultsNameNoCopy("tableCell")
157headingContent = Forward().setResultsNameNoCopy("headingContent")
158todoContent = Forward().setResultsNameNoCopy("value")
159titleContent = Forward().setResultsNameNoCopy("title")
160characterAttributionContent = Forward()
161
162whitespace = buildRegex(ur"[ \t]*")
163whitespace = whitespace.setParseAction(actionHideOnEmpty)
164
165
166# The mode appendix for URLs and tables
167def actionModeAppendix(s, l, st, t):
168    entries = []
169
170    for entry in t.iterFlatByName("entry"):
171        key = entry.findFlatByName("key").getText()
172        data = entry.findFlatByName("data").getText()
173        entries.append((key, data))
174       
175    t.entries = entries
176    return t
177
178
179
180modeAppendixEntry = buildRegex(ur"(?![;\|\]])\S", "key") + buildRegex(ur"(?:(?![;\|\]])\S)*", "data")
181modeAppendixEntry = modeAppendixEntry.setResultsNameNoCopy("entry")
182modeAppendix = modeAppendixEntry + ZeroOrMore(buildRegex(ur";") + modeAppendixEntry)
183modeAppendix = modeAppendix.addParseAction(actionModeAppendix)
184
185
186
187
188# -------------------- Simple formatting --------------------
189
190EscapePlainCharPAT = ur"\\"
191
192
193escapedChar = buildRegex(EscapePlainCharPAT) + buildRegex(ur".", "plainText")
194
195italicsStart = buildRegex(ur"\b_")
196italicsStart = italicsStart.setParseStartAction(createCheckNotIn(("italics",)))
197
198italicsEnd = buildRegex(ur"_\b")
199
200italics = italicsStart + characterAttributionContent + italicsEnd
201italics = italics.setResultsNameNoCopy("italics").setName("italics")
202
203boldStart = buildRegex(ur"\*(?=\S)")
204boldStart = boldStart.setParseStartAction(createCheckNotIn(("bold",)))
205
206boldEnd = buildRegex(ur"\*")
207
208bold = boldStart + characterAttributionContent + boldEnd
209bold = bold.setResultsNameNoCopy("bold").setName("bold")
210
211
212script = buildRegex(ur"<%") + buildRegex(ur".*?(?=%>)", "code") + \
213        buildRegex(ur"%>")
214script = script.setResultsNameNoCopy("script")
215
216horizontalLine = buildRegex(ur"----+[ \t]*$", "horizontalLine")\
217        .setParseStartAction(preActCheckNothingLeft)
218
219
220# -------------------- HTML --------------------
221
222htmlTag = buildRegex(ur"</?[A-Za-z][A-Za-z0-9:]*(?:/| [^\n>]*)?>", "htmlTag")
223
224htmlEntity = buildRegex(
225        ur"&(?:[A-Za-z0-9]{2,10}|#[0-9]{1,10}|#x[0-9a-fA-F]{1,8});",
226        "htmlEntity")
227
228
229
230# -------------------- Heading --------------------
231
232def actionHeading(s, l, st, t):
233    t.level = len(t[0].getText())
234    t.contentNode = t.findFlatByName("headingContent")
235    if t.contentNode is None:
236        raise ParseException(s, l, "a heading needs content")
237
238headingEnd = buildRegex(ur"\n")
239
240heading = buildRegex(ur"^\+{1,15}(?!\+)") + Optional(buildRegex(ur" ")) + \
241        headingContent + headingEnd
242heading = heading.setResultsNameNoCopy("heading").setParseAction(actionHeading)
243
244
245
246# -------------------- Todo-Entry --------------------
247
248def actionTodoEntry(s, l, st, t):
249    t.key = t.findFlatByName("key").getString()
250    t.keyComponents = t.key.split(u".")
251    t.delimiter = t.findFlatByName("todoDelimiter").getString()
252    t.valueNode = t.findFlatByName("value")
253    t.todos = [(t.key, t.valueNode)]
254
255
256
257todoKey = buildRegex(ur"\b(?:todo|done|wait|action|track|issue|"
258        ur"question|project)(?:\.[^:\s]+)?", "key")
259# todoKey = todoKey.setParseStartAction(preActCheckNothingLeft)
260
261todoEnd = buildRegex(ur"\n|\||(?!.)")
262
263todoEntry = todoKey + buildRegex(ur":", "todoDelimiter") + todoContent + \
264        Optional(buildRegex(ur"\|"))
265
266todoEntry = todoEntry.setResultsNameNoCopy("todoEntry")\
267        .setParseAction(actionTodoEntry)
268
269# Only for LanguageHelper.parseTodoEntry()
270todoAsWhole = todoEntry + stringEnd
271
272# -------------------- Indented text, (un)ordered list --------------------
273
274def validateLessIndent(s, l, st, t):
275    if t.strLength >= st.dictStack["indentInfo"].level:
276        raise ParseException(s, l, "expected less indentation")
277
278
279def validateMoreIndent(s, l, st, t):
280    if t.strLength <= st.dictStack["indentInfo"].level:
281        raise ParseException(s, l, "expected more indentation")
282
283
284def validateEqualIndent(s, l, st, t):
285    if t.strLength > st.dictStack["indentInfo"].level:
286        raise ParseException(s, l, "expected equal indentation, but more found")
287    if t.strLength < st.dictStack["indentInfo"].level:
288        raise ParseException(s, l, "expected equal indentation, but less found")
289
290
291def validateEquivalIndent(s, l, st, t):
292    if t.strLength > st.dictStack["indentInfo"].level and \
293            st.dictStack["indentInfo"].type == "normal":
294        raise ParseException(s, l, "expected equival. indentation, but more found")
295    if t.strLength < st.dictStack["indentInfo"].level:
296        raise ParseException(s, l, "expected equival. indentation, but less found")
297
298
299def validateInmostIndentNormal(s, l, st, t):
300    if st.dictStack["indentInfo"].type != "normal":
301        raise ParseException(s, l, 'Inmost indentation not "normal"')
302
303
304def actionResetIndent(s, l, st, t):
305    st.dictStack.getSubTopDict()["lastIdentation"] = 0
306
307
308def actionIndent(s, l, st, t):
309    st.dictStack.getSubTopDict()["lastIdentation"] = t.strLength
310
311
312def actionListStartIndent(s, l, st, t):
313    if t.strLength <= st.dictStack["indentInfo"].level:
314        raise ParseException(s, l, "expected list start indentation, but less or equal found")
315
316    return actionIndent(s, l, st, t)
317
318
319def actionMoreIndent(s, l, st, t):
320    """
321    Called for more indentation before a general indented text.
322    """
323    newIdInfo = IndentInfo("normal")
324   
325    result = actionIndent(s, l, st, t)
326
327    newIdInfo.level = st.dictStack.getSubTopDict().get("lastIdentation", 0)
328    st.dictStack.getSubTopDict()["indentInfo"] = newIdInfo
329   
330    return result
331
332
333
334def preActNewLinesParagraph(s, l, st, pe):
335    if "preHtmlTag" in st.nameStack:
336        raise ParseException(s, l, "Newlines aren't paragraph inside <pre> tag")
337   
338    wikiFormatDetails = st.dictStack["wikiFormatDetails"]
339    if not wikiFormatDetails.paragraphMode:
340        raise ParseException(s, l, "Newlines are only paragraph in paragraph mode")
341
342
343def preActNewLineLineBreak(s, l, st, pe):
344    if "preHtmlTag" in st.nameStack:
345        raise ParseException(s, l, "Newline isn't line break inside <pre> tag")
346   
347    wikiFormatDetails = st.dictStack["wikiFormatDetails"]
348    if wikiFormatDetails.paragraphMode:
349        raise ParseException(s, l, "Newline isn't line break in paragraph mode")
350
351
352def preActNewLineWhitespace(s, l, st, pe):
353    if "preHtmlTag" in st.nameStack:
354        raise ParseException(s, l, "Newline isn't whitespace inside <pre> tag")
355   
356    wikiFormatDetails = st.dictStack["wikiFormatDetails"]
357    if not wikiFormatDetails.paragraphMode:
358        raise ParseException(s, l, "Newline is only whitespace in paragraph mode")
359
360
361
362
363def preActUlPrepareStack(s, l, st, pe):
364    oldIdInfo = st.dictStack.getSubTopDict().get("indentInfo")
365    newIdInfo = IndentInfo("ul")
366
367    newIdInfo.level = st.dictStack.get("lastIdentation", 0)
368    st.dictStack["indentInfo"] = newIdInfo
369
370def preActOlPrepareStack(s, l, st, pe):
371    oldIdInfo = st.dictStack.getSubTopDict().get("indentInfo")
372    newIdInfo = IndentInfo("ol")
373
374    newIdInfo.level = st.dictStack.get("lastIdentation", 0)
375    st.dictStack["indentInfo"] = newIdInfo
376
377
378
379def inmostIndentChecker(typ):
380    def startAction(s, l, st, pe):
381        if st.dictStack["indentInfo"].type != typ:
382            raise ParseException(s, l, 'Expected inmost indent type "%s"' % typ)
383
384    return startAction
385
386
387
388# Only an empty line
389fakeIndentation = buildRegex(ur"^[ \t]+$")
390
391newLine = buildRegex(ur"\n") + Optional(fakeIndentation)
392
393
394
395newLinesParagraph = newLine + OneOrMore(newLine)
396newLinesParagraph = newLinesParagraph.setResultsNameNoCopy("newParagraph")\
397        .setParseStartAction(preActNewLinesParagraph)\
398        .setParseAction(actionResetIndent)
399
400
401newLineLineBreak = newLine
402newLineLineBreak = newLineLineBreak.setResultsName("lineBreak")\
403        .setParseStartAction(preActNewLineLineBreak)\
404        .setParseAction(actionResetIndent)
405
406
407newLineWhitespace = newLine
408newLineWhitespace = newLineWhitespace.setResultsName("whitespace")\
409        .setParseStartAction(preActNewLineWhitespace)
410
411
412moreIndentation = buildRegex(ur"^[ \t]*(?!\n)").setValidateAction(validateMoreIndent)
413moreIndentation = moreIndentation.setParseStartAction(validateInmostIndentNormal).\
414        setParseAction(actionMoreIndent).setName("moreIndentation")
415
416equalIndentation = buildRegex(ur"^[ \t]*(?!\n)").setValidateAction(validateEqualIndent)
417equalIndentation = equalIndentation.setParseAction(actionIndent).\
418        setName("equalIndentation")
419
420lessIndentation = buildRegex(ur"^[ \t]*(?!\n)").setValidateAction(validateLessIndent)
421lessIndentation = lessIndentation.setParseAction(actionIndent).\
422        setName("lessIndentation")
423
424lessIndentOrEnd = stringEnd | lessIndentation
425lessIndentOrEnd = lessIndentOrEnd.setName("lessIndentOrEnd")
426
427
428equivalIndentation = buildRegex(ur"^[ \t]+(?!\n)").setValidateAction(validateEquivalIndent)
429equivalIndentation = equivalIndentation.setParseAction(actionIndent).\
430        setName("equivalIndentation")
431
432
433indentedText = moreIndentation + content + FollowedBy(lessIndentOrEnd)
434indentedText = indentedText.setResultsNameNoCopy("indentedText")
435
436
437listStartIndentation  = buildRegex(ur"^[ \t]*")
438listStartIndentation = listStartIndentation.\
439        setParseAction(actionListStartIndent).setName("listStartIndentation")
440
441
442bullet = buildRegex(ur"\*[ \t]", "bullet")
443
444bulletEntry = equalIndentation.copy()\
445        .addParseStartAction(inmostIndentChecker("ul")) + bullet  # + \
446       
447
448
449unorderedList = listStartIndentation + bullet + \
450        (content + FollowedBy(lessIndentOrEnd))\
451        .addParseStartAction(preActUlPrepareStack)
452unorderedList = unorderedList.setResultsNameNoCopy("unorderedList")
453
454
455number = buildRegex(ur"(?:\d+\.)*(\d+)\.[ \t]|#[ \t]", "number")
456
457numberEntry = equalIndentation.copy()\
458        .addParseStartAction(inmostIndentChecker("ol")) + number
459
460
461
462orderedList = listStartIndentation + number + \
463        (content + FollowedBy(lessIndentOrEnd))\
464        .addParseStartAction(preActOlPrepareStack)
465orderedList = orderedList.setResultsNameNoCopy("orderedList")
466
467
468
469
470# -------------------- Table --------------------
471
472
473tableEnd = buildRegex(ur"^[ \t]*>>[ \t]*(?:\n|$)")
474newRow = buildRegex(ur"\n")
475
476newCellBar = buildRegex(ur"\|")
477newCellTab = buildRegex(ur"\t")
478
479
480def chooseCellEnd(s, l, st, pe):
481    """
482    """
483    if st.dictStack.get("table.tabSeparated", False):
484        return newCellTab
485    else:
486        return newCellBar
487
488
489newCell = Choice([newCellBar, newCellTab], chooseCellEnd)
490
491
492tableRow = tableContentInCell + ZeroOrMore(newCell + tableContentInCell)
493tableRow = tableRow.setResultsNameNoCopy("tableRow").setParseAction(actionHideOnEmpty)
494
495
496def actionTableModeAppendix(s, l, st, t):
497    st.dictStack.getNamedDict("table")["table.tabSeparated"] = False
498    t.cssClass = None
499    for key, data in t.entries:
500        if key == "t":
501            st.dictStack.getNamedDict("table")["table.tabSeparated"] = True
502        # Styles are designated by "s=". They will result in the css class
503        # s being applied to all table elements. E. g. "s=foo" uses class
504        # "foo". The '=' can be omitted, therefore "sfoo" does the same.
505        elif key == "s":
506            if data.startswith(u"="):
507                data = data[1:]
508            t.cssClass = data
509
510
511tableModeAppendix = modeAppendix.setResultsName("tableModeAppendix").addParseAction(actionTableModeAppendix)
512
513table = buildRegex(ur"<<\|").setParseStartAction(preActCheckNothingLeft) + \
514        Optional(tableModeAppendix) + buildRegex(ur"[ \t]*\n") + tableRow + ZeroOrMore(newRow + tableRow) + tableEnd
515table = table.setResultsNameNoCopy("table")
516
517
518
519# -------------------- Suppress highlighting and no export --------------------
520
521suppressHighlightingMultipleLines = buildRegex(ur"<<[ \t]*\n")\
522        .setParseStartAction(preActCheckNothingLeft) + \
523        buildRegex(ur".*?(?=^[ \t]*>>[ \t]*(?:\n|$))", "plainText") + \
524        buildRegex(ur"^[ \t]*>>[ \t]*(?:\n|$)")
525
526suppressHighlightingSingleLine = buildRegex(ur"<<") + \
527        buildRegex(ur"[^\n]*?(?=>>)", "plainText") + buildRegex(ur">>")
528
529# suppressHighlighting = suppressHighlightingMultipleLines | suppressHighlightingSingleLine
530
531
532
533
534# -------------------- No export area--------------------
535
536def actionNoExport(s, l, st, t):
537    # Change name to reduce work when interpreting
538    t.name = "noExport"
539
540
541
542noExportMultipleLinesEnd = buildRegex(ur"^[ \t]*>>[ \t]*(?:\n|$)")
543noExportSingleLineEnd = buildRegex(ur">>")
544
545
546noExportMultipleLines = buildRegex(ur"<<hide[ \t]*\n")\
547        .setParseStartAction(preActCheckNothingLeft,
548        createCheckNotIn(("noExportMl", "noExportSl"))) + \
549        content + noExportMultipleLinesEnd
550noExportMultipleLines = noExportMultipleLines.setResultsNameNoCopy("noExportMl")\
551        .setParseAction(actionNoExport)
552
553noExportSingleLine = buildRegex(ur"<<hide[ \t]") + oneLineContent + \
554        noExportSingleLineEnd
555noExportSingleLine = noExportSingleLine.setResultsNameNoCopy("noExportSl")\
556        .setParseStartAction(
557        createCheckNotIn(("noExportMl", "noExportSl")))\
558        .setParseAction(actionNoExport)
559
560
561
562# -------------------- Pre block --------------------
563
564preBlock = buildRegex(ur"<<pre[ \t]*\n")\
565        .setParseStartAction(preActCheckNothingLeft) + \
566        buildRegex(ur".*?(?=^[ \t]*>>[ \t]*(?:\n|$))", "preText") + \
567        buildRegex(ur"^[ \t]*>>[ \t]*(?:\n|$)")
568preBlock = preBlock.setResultsNameNoCopy("preBlock")
569
570
571# -------------------- Auto generated area --------------------
572# TODO
573
574# autoGeneratedArea = buildRegex(ur"<<[ \t]+")\
575#         .setParseStartAction(preActCheckNothingLeft) + \
576#         buildRegex(ur"[^\n]+", "expression") + \
577#         buildRegex(ur"\n") + buildRegex(ur".*?(?=>>)", "plainText") + \
578#         buildRegex(ur">>[ \t]*$").setParseStartAction(preActCheckNothingLeft)
579
580
581
582
583# -------------------- <pre> html tag --------------------
584
585def actionPreHtmlTag(s, l, st, t):
586    """
587    Remove the node name so this doesn't become an own NTNode.
588    """
589    t.name = None
590
591
592
593preHtmlStart = buildRegex(ur"<pre(?: [^\n>]*)?>", "htmlTag")\
594        .setParseStartAction(createCheckNotIn(("preHtmlTag",)))
595
596preHtmlEnd = buildRegex(ur"</pre(?: [^\n>]*)?>", "htmlTag")
597
598preHtmlTag = preHtmlStart + content + preHtmlEnd
599preHtmlTag = preHtmlTag.setResultsNameNoCopy("preHtmlTag")\
600        .setParseAction(actionPreHtmlTag)
601
602
603
604# -------------------- Wikiwords and URLs --------------------
605
606BracketStart = u"["
607BracketStartPAT = ur"\["
608
609BracketEnd = u"]"
610BracketEndPAT = ur"\]"
611# WikiWordNccPAT = ur"/?(?:/?[^\\/\[\]\|\000-\037=:;#!]+)+" # ur"[\w\-\_ \t]+"
612
613# Single part of subpage path
614WikiWordPathPartPAT = ur"(?!\.\.)[^\\/\[\]\|\000-\037=:;#!]+"
615WikiPageNamePAT = WikiWordPathPartPAT + "(?:/" + WikiWordPathPartPAT + ")*"
616
617# Begins with dotted path parts which mean to go upward in subpage path
618WikiWordDottedPathPAT = ur"\.\.(/\.\.)*(?:/" + WikiWordPathPartPAT + ")*"
619WikiWordNonDottedPathPAT = ur"/{0,2}" + WikiPageNamePAT
620
621WikiWordNccPAT = WikiWordDottedPathPAT + ur"|" + WikiWordNonDottedPathPAT
622
623WikiWordTitleStartPAT = ur"\|"
624WikiWordAnchorStart = u"!"
625WikiWordAnchorStartPAT = ur"!"
626
627# Bracket start, escaped for reverse RE pattern (for autocompletion)
628BracketStartRevPAT = ur"\["
629# Bracket end, escaped for reverse RE pattern (for autocompletion)
630BracketEndRevPAT = ur"\]"
631
632WikiWordNccRevPAT = ur"[^\\\[\]\|\000-\037=:;#!]+?"  # ur"[\w\-\_ \t.]+?"
633
634
635
636WikiWordCcPAT = (ur"(?:[" +
637        UPPERCASE +
638        ur"]+[" +
639        LOWERCASE +
640        ur"]+[" +
641        UPPERCASE +
642        ur"]+[" +
643        LETTERS + string.digits +
644        ur"]*|[" +
645        UPPERCASE +
646        ur"]{2,}[" +
647        LOWERCASE +
648        ur"]+)")
649
650
651UrlPAT = ur'(?:(?:https?|ftp|rel|wikirel)://|mailto:|Outlook:\S|wiki:/|file:/)'\
652        ur'(?:(?![.,;:!?)]+(?:["\s]|$))[^"\s|\]<>])*'
653
654
655UrlInBracketsPAT = ur'(?:(?:https?|ftp|rel|wikirel)://|mailto:|Outlook:\S|wiki:/|file:/)'\
656        ur'(?:(?![ \t]+[|\]])(?: |[^"\s|\]<>]))*'
657
658
659bracketStart = buildRegex(BracketStartPAT)
660bracketEnd = buildRegex(BracketEndPAT)
661
662
663UnescapeExternalFragmentRE   = re.compile(ur"#(.)",
664                              re.DOTALL | re.UNICODE | re.MULTILINE)
665
666
667def reThrough(matchobj):
668    return matchobj.group(1)
669
670
671def actionSearchFragmentExtern(s, l, st, t):
672    """
673    Called to unescape external fragment of wikiword.
674    """
675    lt2 = getFirstTerminalNode(t)
676    if lt2 is None:
677        return None
678   
679    lt2.unescaped = UnescapeExternalFragmentRE.sub(ur"\1", lt2.text)
680
681
682UnescapeStandardRE = re.compile(EscapePlainCharPAT + ur"(.)",
683                              re.DOTALL | re.UNICODE | re.MULTILINE)
684
685def actionSearchFragmentIntern(s, l, st, t):
686    lt2 = getFirstTerminalNode(t)
687    if lt2 is None:
688        return None
689
690    lt2.unescaped = UnescapeStandardRE.sub(ur"\1", lt2.text)
691
692
693
694def resolveWikiWordLink(link, basePage):
695    """
696    If using subpages this is used to resolve a link to the right wiki word
697    relative to basePage on which the link is placed.
698    It returns the absolute link (page name).
699    """
700    return _TheHelper.resolvePrefixSilenceAndWikiWordLink(link, basePage)[2]
701   
702
703
704
705def actionWikiWordNcc(s, l, st, t):
706    t.wikiWord = t.findFlatByName("word")
707    if t.wikiWord is not None:
708        wikiFormatDetails = st.dictStack["wikiFormatDetails"]
709
710        t.wikiWord = resolveWikiWordLink(t.wikiWord.getString(),
711                st.dictStack["wikiFormatDetails"].basePage)
712
713        if t.wikiWord == u"":
714            raise ParseException(s, l, "Subpage resolution of wikiword failed")
715
716        if t.wikiWord in wikiFormatDetails.wikiDocument.getNccWordBlacklist():
717            raise ParseException(s, l, "Non-CamelCase word is in blacklist")
718
719    t.titleNode = t.findFlatByName("title")
720
721    t.fragmentNode = t.findFlatByName("searchFragment")
722    if t.fragmentNode is not None:
723        t.searchFragment = t.fragmentNode.unescaped
724    else:
725        t.searchFragment = None
726   
727    t.anchorLink = t.findFlatByName("anchorLink")
728    if t.anchorLink is not None:
729        t.anchorLink = t.anchorLink.getString()
730
731
732
733def preActCheckWikiWordCcAllowed(s, l, st, pe):
734    try:
735        wikiFormatDetails = st.dictStack["wikiFormatDetails"]
736       
737        if not wikiFormatDetails.withCamelCase:
738            raise ParseException(s, l, "CamelCase words not allowed here")
739    except KeyError:
740        pass
741
742
743def actionWikiWordCc(s, l, st, t):
744    t.wikiWord = t.findFlatByName("word")
745    if t.wikiWord is not None:
746        wikiFormatDetails = st.dictStack["wikiFormatDetails"]
747
748        t.wikiWord = resolveWikiWordLink(t.wikiWord.getString(),
749                wikiFormatDetails.basePage)
750
751        if t.wikiWord == u"":
752            raise ParseException(s, l, "Subpage resolution of wikiword failed")
753
754        try:
755#             wikiFormatDetails = st.dictStack["wikiFormatDetails"]
756           
757            if t.wikiWord in wikiFormatDetails.wikiDocument.getCcWordBlacklist():
758                raise ParseException(s, l, "CamelCase word is in blacklist")
759        except KeyError:
760            pass
761
762    t.titleNode = None
763
764    t.fragmentNode = t.findFlatByName("searchFragment")
765    if t.fragmentNode is not None:
766        t.searchFragment = t.fragmentNode.unescaped
767    else:
768        t.searchFragment = None
769
770    t.anchorLink = t.findFlatByName("anchorLink")
771    if t.anchorLink is not None:
772        t.anchorLink = t.anchorLink.getString()
773
774
775
776
777
778def actionExtractableWikiWord(s, l, st, t):
779    t.wikiWord = t.findFlatByName("word")
780    if t.wikiWord is not None:
781        t.wikiWord = t.wikiWord.getString()
782
783
784
785def actionUrlLink(s, l, st, t):
786    if t.name == "urlLinkBare":
787        t.bracketed = False
788    else:
789        t.bracketed = True
790   
791    t.name = "urlLink"       
792    t.appendixNode = t.findFlatByName("urlModeAppendix")
793    t.coreNode = t.findFlatByName("url")
794
795    # Valid URL but may differ from original input
796    t.url = urlQuoteSpecific(t.coreNode.getString(), ' ')
797    t.titleNode = t.findFlatByName("title")
798#     print "--actionUrlLink3", repr(t.url)
799
800
801def actionAnchorDef(s, l, st, t):
802    t.anchorLink = t.findFlatByName("anchor").getString()
803
804
805searchFragmentExtern = buildRegex(ur"#") + \
806        buildRegex(ur"(?:(?:#.)|[^ \t\n#])+", "searchFragment")\
807        .setParseAction(actionSearchFragmentExtern)
808
809searchFragmentIntern = buildRegex(ur"#") + buildRegex(ur"(?:(?:" + EscapePlainCharPAT +
810        ur".)|(?!" + WikiWordTitleStartPAT +
811        ur"|" +  BracketEndPAT + ur").)+", "searchFragment")\
812        .setParseAction(actionSearchFragmentIntern)
813
814wikiWordAnchorLink = buildRegex(WikiWordAnchorStartPAT) + \
815        buildRegex(ur"[A-Za-z0-9\_]+", "anchorLink")
816
817
818title = buildRegex(WikiWordTitleStartPAT + ur"[ \t]*") + titleContent    # content.setResultsName("title")
819
820
821wikiWordNccCore = buildRegex(WikiWordNccPAT, "word")
822
823wikiWordNcc = bracketStart + \
824        wikiWordNccCore.copy().addParseAction(actionCutRightWhitespace) + \
825        Optional(MatchFirst([searchFragmentIntern, wikiWordAnchorLink])) + whitespace + \
826        Optional(title) + bracketEnd + \
827        Optional(MatchFirst([searchFragmentExtern, wikiWordAnchorLink]))
828
829wikiWordNcc = wikiWordNcc.setResultsNameNoCopy("wikiWord").setName("wikiWordNcc")\
830        .setParseAction(actionWikiWordNcc)
831
832
833anchorDef = buildRegex(ur"^[ \t]*anchor:[ \t]*") + buildRegex(ur"[A-Za-z0-9\_]+",
834        "anchor") + buildRegex(ur"\n")
835anchorDef = anchorDef.setResultsNameNoCopy("anchorDef").setParseAction(actionAnchorDef)
836
837
838AnchorRE = re.compile(ur"^[ \t]*anchor:[ \t]*(?P<anchorValue>[A-Za-z0-9\_]+)\n",
839        re.DOTALL | re.UNICODE | re.MULTILINE)
840
841
842
843urlModeAppendix = modeAppendix.setResultsName("urlModeAppendix")
844
845urlWithAppend = buildRegex(UrlPAT, "url") + Optional(buildRegex(ur">") + \
846        urlModeAppendix)
847
848urlWithAppendInBrackets = buildRegex(UrlInBracketsPAT, "url") + Optional(buildRegex(ur">") + \
849        urlModeAppendix)
850
851
852urlBare = urlWithAppend.setResultsName("urlLinkBare")
853urlBare = urlBare.setParseAction(actionUrlLink)
854
855urlTitled = bracketStart + urlWithAppendInBrackets + whitespace + \
856        Optional(title) + bracketEnd
857# urlTitled = bracketStart + urlWithAppend + whitespace + \
858#         Optional(title) + bracketEnd
859urlTitled = urlTitled.setResultsNameNoCopy("urlLinkBracketed").setParseAction(actionUrlLink)
860
861
862
863urlRef = urlTitled | urlBare
864
865
866# TODO anchor/fragment
867wikiWordCc = buildRegex(ur"\b(?<!~)" + WikiWordCcPAT + ur"\b", "word") + \
868        Optional(MatchFirst([searchFragmentExtern, wikiWordAnchorLink])) # Group( )
869wikiWordCc = wikiWordCc.setResultsNameNoCopy("wikiWord").setName("wikiWordCc")\
870        .setParseStartAction(preActCheckWikiWordCcAllowed)\
871        .setParseAction(actionWikiWordCc)
872
873wikiWord = wikiWordNcc | wikiWordCc
874
875
876
877# Needed for _TheHelper.extractWikiWordFromLink()
878
879extractableWikiWord = (wikiWordNccCore | wikiWordNcc) + stringEnd
880extractableWikiWord = extractableWikiWord.setResultsNameNoCopy("extractableWikiWord")\
881        .setParseAction(actionExtractableWikiWord).optimize(("regexcombine",))\
882        .parseWithTabs()
883
884
885wikiPageNameRE = re.compile(ur"^" + WikiPageNamePAT + ur"$",
886        re.DOTALL | re.UNICODE | re.MULTILINE)
887
888
889wikiWordCcRE = re.compile(ur"^" + WikiWordCcPAT + ur"$",
890        re.DOTALL | re.UNICODE | re.MULTILINE)
891
892def isCcWikiWord(word):
893    return bool(wikiWordCcRE.match(word))
894
895
896wikiLinkCoreRE = re.compile(ur"^" + WikiWordNccPAT + ur"$",
897        re.DOTALL | re.UNICODE | re.MULTILINE)
898
899
900
901# -------------------- Footnotes --------------------
902
903footnotePAT = ur"[0-9]+"
904
905def preActCheckFootnotesAllowed(s, l, st, pe):
906    wikiFormatDetails = st.dictStack["wikiFormatDetails"]
907   
908    if wikiFormatDetails.wikiLanguageDetails.footnotesAsWws:
909        raise ParseException(s, l, "CamelCase words not allowed here")
910
911
912def actionFootnote(s, l, st, t):
913    t.footnoteId = t.findFlatByName("footnoteId").getString()
914
915
916footnote = bracketStart + buildRegex(footnotePAT, "footnoteId") + bracketEnd
917footnote = footnote.setResultsNameNoCopy("footnote")\
918        .setParseStartAction(preActCheckFootnotesAllowed)\
919        .setParseAction(actionFootnote)
920
921
922footnoteRE = re.compile(ur"^" + footnotePAT + ur"$",
923        re.DOTALL | re.UNICODE | re.MULTILINE)
924
925
926# -------------------- Attributes (=properties) and insertions --------------------
927
928
929def actionAttrInsValueQuoteStart(s, l, st, t):
930    st.dictStack.getSubTopDict()["attrInsValueQuote"] = t[0].text
931
932def actionAttrInsValueQuoteEnd(s, l, st, t):
933    if t[0].text != st.dictStack.getSubTopDict().get("attrInsValueQuote"):
934        raise ParseException(s, l, "End quote of attribute/insertion does not match start")
935
936
937def pseudoActionAttrInsQuotedValue(s, l, st, t):
938    if t.strLength == 0:
939        return []
940    t.name = "value"
941    return t
942
943
944def actionAttribute(s, l, st, t):
945    key = t.findFlatByName("key").getString()
946    t.key = key
947    t.keyComponents = t.key.split(u".")
948    t.attrs = [(key, vNode.getString()) for vNode in t.iterFlatByName("value")]
949
950
951def actionInsertion(s, l, st, t):
952    t.key = t.findFlatByName("key").getString()
953    t.keyComponents = t.key.split(u".")
954    values = list(vNode.getString() for vNode in t.iterFlatByName("value"))
955    t.value = values[0]
956    del values[0]
957    t.appendices = values
958
959
960
961attrInsQuote = buildRegex(ur"\"+|'+|/+|\\+")
962attrInsQuoteStart = attrInsQuote.copy()\
963        .setParseAction(actionAttrInsValueQuoteStart)
964attrInsQuoteEnd = attrInsQuote.copy()\
965        .setParseAction(actionAttrInsValueQuoteEnd)
966
967attrInsQuotedValue = FindFirst([], attrInsQuoteEnd)\
968        .setPseudoParseAction(pseudoActionAttrInsQuotedValue)
969
970# attrInsNonQuotedValue = buildRegex(ur"[\w\-\_ \t:,.!?#%|/]*", "value")
971attrInsNonQuotedValue = buildRegex(ur"(?:[ \t]*[\w\-\_:,.!?#%|/]+)*", "value")
972
973
974attrInsValue = whitespace + ((attrInsQuoteStart + attrInsQuotedValue + \
975        attrInsQuoteEnd) | attrInsNonQuotedValue)
976
977attrInsKey = buildRegex(ur"[\w\-\_\.]+", "key")
978
979attribute = bracketStart + whitespace + attrInsKey + \
980        buildRegex(ur"[ \t]*[=:]") + attrInsValue + \
981        ZeroOrMore(buildRegex(ur";") + attrInsValue) + whitespace + bracketEnd
982attribute = attribute.setResultsNameNoCopy("attribute").setParseAction(actionAttribute)
983
984
985insertion = bracketStart + buildRegex(ur":") + whitespace + attrInsKey + \
986        buildRegex(ur"[ \t]*[=:]") + attrInsValue + \
987        ZeroOrMore(buildRegex(ur";") + attrInsValue) + whitespace + bracketEnd
988insertion = insertion.setResultsNameNoCopy("insertion").setParseAction(actionInsertion)
989
990
991
992# -------------------- Additional regexes to provide --------------------
993
994
995# Needed for auto-bullet/auto-unbullet functionality of editor
996BulletRE        = re.compile(ur"^(?P<indentBullet>[ \t]*)(?P<actualBullet>\*[ \t])",
997        re.DOTALL | re.UNICODE | re.MULTILINE)
998NumericSimpleBulletRE = re.compile(ur"^(?P<indentBullet>[ \t]*)(?P<actualBullet>#[ \t])",
999        re.DOTALL | re.UNICODE | re.MULTILINE)
1000NumericBulletRE = re.compile(ur"^(?P<indentNumeric>[ \t]*)(?P<preLastNumeric>(?:\d+\.)*)(\d+)\.[ \t]",
1001        re.DOTALL | re.UNICODE | re.MULTILINE)
1002
1003
1004# Needed for handleRewrapText
1005EmptyLineRE     = re.compile(ur"^[ \t\r\n]*$",
1006        re.DOTALL | re.UNICODE | re.MULTILINE)
1007
1008
1009
1010
1011# Reverse REs for autocompletion
1012revSingleWikiWord    =       (ur"(?:[" +
1013                             LETTERS + string.digits +
1014                             ur"]*[" +
1015                             UPPERCASE+
1016                             ur"])")   # Needed for auto-completion
1017
1018RevWikiWordRE      = re.compile(ur"^" +
1019                             revSingleWikiWord + ur"(?![\~])\b",
1020                             re.DOTALL | re.UNICODE | re.MULTILINE)
1021                             # Needed for auto-completion
1022
1023
1024RevWikiWordRE2     = re.compile(ur"^" + WikiWordNccRevPAT + BracketStartRevPAT,
1025        re.DOTALL | re.UNICODE | re.MULTILINE)  # Needed for auto-completion
1026
1027RevAttributeValue     = re.compile(
1028        ur"^([\w\-\_ \t:;,.!?#/|]*?)([ \t]*[=:][ \t]*)([\w\-\_ \t\.]+?)" +
1029        BracketStartRevPAT,
1030        re.DOTALL | re.UNICODE | re.MULTILINE)  # Needed for auto-completion
1031
1032
1033RevTodoKeyRE = re.compile(ur"^(?:[^:\s]{0,40}\.)??"
1034        ur"(?:odot|enod|tiaw|noitca|kcart|eussi|noitseuq|tcejorp)",
1035        re.DOTALL | re.UNICODE | re.MULTILINE)  # Needed for auto-completion
1036
1037RevTodoValueRE = re.compile(ur"^[^\n:]{0,30}:" + RevTodoKeyRE.pattern[1:],
1038        re.DOTALL | re.UNICODE | re.MULTILINE)  # Needed for auto-completion
1039
1040
1041RevWikiWordAnchorRE = re.compile(ur"^(?P<anchorBegin>[A-Za-z0-9\_]{0,20})" +
1042        WikiWordAnchorStartPAT + ur"(?P<wikiWord>" + RevWikiWordRE.pattern[1:] + ur")",
1043        re.DOTALL | re.UNICODE | re.MULTILINE)  # Needed for auto-completion
1044       
1045RevWikiWordAnchorRE2 = re.compile(ur"^(?P<anchorBegin>[A-Za-z0-9\_]{0,20})" +
1046        WikiWordAnchorStartPAT + BracketEndRevPAT + ur"(?P<wikiWord>" +
1047        WikiWordNccRevPAT + ur")" + BracketStartRevPAT,
1048        re.DOTALL | re.UNICODE | re.MULTILINE)  # Needed for auto-completion
1049
1050
1051# Simple todo RE for autocompletion.
1052ToDoREWithCapturing = re.compile(ur"^([^:\s]+):[ \t]*(.+?)$",
1053        re.DOTALL | re.UNICODE | re.MULTILINE)
1054
1055
1056
1057# For auto-link mode relax
1058AutoLinkRelaxSplitRE = re.compile(r"[\W]+", re.IGNORECASE | re.UNICODE)
1059
1060AutoLinkRelaxJoinPAT = ur"[\W]+"
1061AutoLinkRelaxJoinFlags = re.IGNORECASE | re.UNICODE
1062
1063
1064
1065# For spell checking
1066TextWordRE = re.compile(ur"(?P<negative>[0-9]+|"+ UrlPAT + u"|\b(?<!~)" +
1067        WikiWordCcPAT + ur"\b)|\b[\w']+",
1068        re.DOTALL | re.UNICODE | re.MULTILINE)
1069
1070
1071
1072
1073
1074# -------------------- End tokens --------------------
1075
1076
1077TOKEN_TO_END = {
1078        "bold": boldEnd,
1079        "italics": italicsEnd,
1080        "unorderedList": lessIndentOrEnd,
1081        "orderedList": lessIndentOrEnd,
1082        "indentedText": lessIndentOrEnd,
1083        "wikiWord": bracketEnd,
1084        "urlLinkBracketed": bracketEnd,
1085        "table": tableEnd,
1086        "preHtmlTag": preHtmlEnd,
1087        "heading": headingEnd,
1088        "todoEntry": todoEnd,
1089        "noExportMl": noExportMultipleLinesEnd,
1090        "noExportSl": noExportSingleLineEnd
1091    }
1092
1093
1094def chooseEndToken(s, l, st, pe):
1095    """
1096    """
1097    for tokName in reversed(st.nameStack):
1098        end = TOKEN_TO_END.get(tokName)
1099        if end is not None:
1100            return end
1101
1102    return stringEnd
1103
1104
1105endToken = Choice([stringEnd]+TOKEN_TO_END.values(), chooseEndToken)
1106
1107endTokenInTable = endToken | newCell | newRow
1108
1109endTokenInTitle = endToken | buildRegex(ur"\n")
1110
1111endTokenInCharacterAttribution = endToken | heading
1112
1113
1114
1115# -------------------- Content definitions --------------------
1116
1117
1118findMarkupInCell = FindFirst([bold, italics, noExportSingleLine,
1119        suppressHighlightingSingleLine,
1120        urlRef, insertion, escapedChar, footnote, wikiWord,
1121        htmlTag, htmlEntity], endTokenInTable)
1122findMarkupInCell = findMarkupInCell.setPseudoParseAction(pseudoActionFindMarkup)
1123
1124temp = ZeroOrMore(NotAny(endTokenInTable) + findMarkupInCell)
1125temp = temp.leaveWhitespace().parseWithTabs()
1126tableContentInCell << temp
1127
1128
1129
1130findMarkupInTitle = FindFirst([bold, italics, noExportSingleLine,
1131        suppressHighlightingSingleLine,
1132        urlRef, insertion, escapedChar, footnote, htmlTag, htmlEntity],
1133        endTokenInTitle)
1134findMarkupInTitle = findMarkupInTitle.setPseudoParseAction(pseudoActionFindMarkup)
1135
1136temp = ZeroOrMore(NotAny(endTokenInTitle) + findMarkupInTitle)
1137temp = temp.leaveWhitespace().parseWithTabs()
1138titleContent << temp
1139
1140
1141
1142findMarkupInHeading = FindFirst([bold, italics, noExportSingleLine,
1143        suppressHighlightingSingleLine,
1144        urlRef, insertion, escapedChar, footnote, wikiWord, htmlTag,
1145        htmlEntity], endToken)
1146findMarkupInHeading = findMarkupInHeading.setPseudoParseAction(
1147        pseudoActionFindMarkup)
1148
1149temp = ZeroOrMore(NotAny(endToken) + findMarkupInHeading)
1150temp = temp.leaveWhitespace().parseWithTabs()
1151headingContent << temp
1152
1153
1154
1155findMarkupInTodo = FindFirst([bold, italics, noExportSingleLine,
1156        suppressHighlightingSingleLine,
1157        urlRef, attribute, insertion, escapedChar, footnote, wikiWord,
1158        htmlTag, htmlEntity], endToken)
1159findMarkupInTodo = findMarkupInTodo.setPseudoParseAction(
1160        pseudoActionFindMarkup)
1161
1162temp = OneOrMore(NotAny(endToken) + findMarkupInTodo)
1163temp = temp.leaveWhitespace().parseWithTabs()
1164todoContent << temp
1165oneLineContent << temp
1166
1167
1168
1169findMarkupInCharacterAttribution = FindFirst([bold, italics, noExportSingleLine,
1170        suppressHighlightingSingleLine, urlRef,
1171        attribute, insertion, escapedChar, footnote, wikiWord,
1172        newLinesParagraph, newLineLineBreak, newLineWhitespace,
1173        todoEntry, anchorDef, preHtmlTag, htmlTag,
1174        htmlEntity, bulletEntry, unorderedList, numberEntry, orderedList,
1175        indentedText, table, preBlock, noExportMultipleLines,
1176        suppressHighlightingMultipleLines, equivalIndentation],
1177        endTokenInCharacterAttribution)
1178findMarkupInCharacterAttribution = findMarkupInCharacterAttribution\
1179        .setPseudoParseAction(pseudoActionFindMarkup)
1180
1181temp = ZeroOrMore(NotAny(endTokenInCharacterAttribution) +
1182        findMarkupInCharacterAttribution)
1183temp = temp.leaveWhitespace().parseWithTabs()
1184characterAttributionContent << temp
1185
1186
1187
1188findMarkup = FindFirst([bold, italics, noExportSingleLine,
1189        suppressHighlightingSingleLine, urlRef,
1190        attribute, insertion, escapedChar, footnote, wikiWord,
1191        newLinesParagraph, newLineLineBreak, newLineWhitespace, heading,
1192        todoEntry, anchorDef, preHtmlTag, htmlTag,
1193        htmlEntity, bulletEntry, unorderedList, numberEntry, orderedList,
1194        indentedText, table, preBlock, noExportMultipleLines,
1195        suppressHighlightingMultipleLines,
1196        script, horizontalLine, equivalIndentation], endToken)
1197findMarkup = findMarkup.setPseudoParseAction(pseudoActionFindMarkup)
1198
1199
1200content << ZeroOrMore(NotAny(endToken) + findMarkup)  # .setResultsName("ZeroOrMore")
1201content = content.leaveWhitespace().setValidateAction(validateNonEmpty).parseWithTabs()
1202
1203
1204
1205text = content + stringEnd
1206
1207
1208# Run optimizer
1209
1210# Separate element for LanguageHelper.parseTodoEntry()
1211todoAsWhole = todoAsWhole.optimize(("regexcombine",)).parseWithTabs()
1212
1213# Whole text, optimizes subelements recursively
1214text = text.optimize(("regexcombine",)).parseWithTabs()
1215# text = text.parseWithTabs()
1216
1217
1218# text.setDebugRecurs(True)
1219
1220
1221
1222def _buildBaseDict(wikiDocument=None, formatDetails=None):
1223    if formatDetails is None:
1224        if wikiDocument is None:
1225            formatDetails = WikiDocument.getUserDefaultWikiPageFormatDetails()
1226            formatDetails.setWikiLanguageDetails(WikiLanguageDetails(None, None))
1227        else:
1228            formatDetails = wikiDocument.getWikiDefaultWikiPageFormatDetails()
1229
1230    return {"indentInfo": IndentInfo("normal"),
1231            "wikiFormatDetails": formatDetails
1232        }
1233
1234
1235
1236# -------------------- API for plugin WikiParser --------------------
1237# During beta state of the WikidPad version, this API isn't stable yet,
1238# so changes may occur!
1239
1240
1241class _TheParser(object):
1242    @staticmethod
1243    def reset():
1244        """
1245        Reset possible internal states of a (non-thread-safe) object for
1246        later reuse.
1247        """
1248        pass
1249
1250    @staticmethod
1251    def getWikiLanguageName():
1252        """
1253        Return the internal name of the wiki language implemented by this
1254        parser.
1255        """
1256        return WIKI_LANGUAGE_NAME
1257
1258
1259
1260    @staticmethod
1261    def _postProcessing(intLanguageName, content, formatDetails, pageAst,
1262            threadstop):
1263        """
1264        Do some cleanup after main parsing.
1265        Not part of public API.
1266        """
1267        autoLinkRelaxRE = None
1268        if formatDetails.autoLinkMode == u"relax":
1269            relaxList = formatDetails.wikiDocument.getAutoLinkRelaxInfo()
1270
1271            def recursAutoLink(ast):
1272                newAstNodes = []
1273                for node in ast.getChildren():
1274                    if isinstance(node, NonTerminalNode):
1275                        newAstNodes.append(recursAutoLink(node))
1276                        continue
1277   
1278                    if node.name == "plainText":
1279                        text = node.text
1280                        start = node.pos
1281                       
1282                        threadstop.testRunning()
1283                        while text != u"":
1284                            # The foundWordText is the text as typed in the page
1285                            # foundWord is the word as entered in database
1286                            # These two may differ (esp. in whitespaces)
1287                            foundPos = len(text)
1288                            foundWord = None
1289                            foundWordText = None
1290                           
1291                            # Search all regexes for the earliest match
1292                            for regex, word in relaxList:
1293                                match = regex.search(text)
1294                                if match:
1295                                    pos = match.start(0)
1296                                    if pos < foundPos:
1297                                        # Match is earlier than previous
1298                                        foundPos = pos
1299                                        foundWord = word
1300                                        foundWordText = match.group(0)
1301                                        if pos == 0:
1302                                            # Can't find a better match -> stop loop
1303                                            break
1304
1305                            # Add token for text before found word (if any)
1306                            preText = text[:foundPos]
1307                            if preText != u"":
1308                                newAstNodes.append(buildSyntaxNode(preText,
1309                                        start, "plainText"))
1310               
1311                                start += len(preText)
1312                                text = text[len(preText):]
1313                           
1314                            if foundWord is not None:
1315                                wwNode = buildSyntaxNode(
1316                                        [buildSyntaxNode(foundWordText, start, "word")],
1317                                        start, "wikiWord")
1318                                       
1319                                wwNode.searchFragment = None
1320                                wwNode.anchorLink = None
1321                                wwNode.wikiWord = foundWord
1322                                wwNode.titleNode = buildSyntaxNode(foundWordText, start, "plainText") # None
1323
1324                                newAstNodes.append(wwNode)
1325
1326                                inc = max(len(foundWordText), 1)
1327                                start += inc
1328                                text = text[inc:]
1329
1330                        continue
1331
1332                    newAstNodes.append(node)
1333
1334
1335                ast.sub = newAstNodes
1336   
1337                return ast
1338
1339            pageAst = recursAutoLink(pageAst)
1340       
1341        return pageAst
1342
1343    @staticmethod
1344    def parse(intLanguageName, content, formatDetails, threadstop):
1345        """
1346        Parse the  content  written in wiki language  intLanguageName  using
1347        formatDetails  and regularly call  threadstop.testRunning()  to
1348        raise exception if execution thread is no longer current parsing
1349        thread.
1350        """
1351
1352        if len(content) == 0:
1353            return buildSyntaxNode([], 0, "text")
1354
1355        if formatDetails.noFormat:
1356            return buildSyntaxNode([buildSyntaxNode(content, 0, "plainText")],
1357                    0, "text")
1358
1359        baseDict = _buildBaseDict(formatDetails=formatDetails)
1360
1361##         _prof.start()
1362        try:
1363            t = text.parseString(content, parseAll=True, baseDict=baseDict,
1364                    threadstop=threadstop)
1365            t = buildSyntaxNode(t, 0, "text")
1366
1367            t = _TheParser._postProcessing(intLanguageName, content, formatDetails,
1368                    t, threadstop)
1369
1370        finally:
1371##             _prof.stop()
1372            pass
1373
1374        return t
1375
1376THE_PARSER = _TheParser()
1377
1378
1379
1380
1381
1382class WikiLanguageDetails(object):
1383    """
1384    Stores state of wiki language specific options and allows to check if
1385    two option sets are equivalent.
1386    """
1387    __slots__ = ("__weakref__", "footnotesAsWws", "wikiDocument")
1388
1389    def __init__(self, wikiDocument, docPage):
1390        self.wikiDocument = wikiDocument
1391        if self.wikiDocument is None:
1392            # Set wiki-independent default values
1393            self.footnotesAsWws = False
1394        else:
1395            self.footnotesAsWws = self.wikiDocument.getWikiConfig().getboolean(
1396                    "main", "footnotes_as_wikiwords", False)
1397
1398    @staticmethod
1399    def getWikiLanguageName():
1400        return WIKI_LANGUAGE_NAME
1401
1402
1403    def isEquivTo(self, details):
1404        """
1405        Compares with other details object if both are "equivalent"
1406        """
1407        return self.getWikiLanguageName() == details.getWikiLanguageName() and \
1408                self.footnotesAsWws == details.footnotesAsWws
1409
1410
1411class _WikiLinkPath(object):
1412    __slots__ = ("upwardCount", "components")
1413    def __init__(self, link=None, pageName=None, upwardCount=-1,
1414            components=None):
1415        assert (link is None) or (pageName is None)
1416
1417        if pageName is not None:
1418            # Handle wiki word as absolute link
1419            self.upwardCount = -1
1420            self.components = pageName.split(u"/")
1421            return
1422
1423        if link is None:
1424            if components is None:
1425                components = []
1426
1427            self.upwardCount = upwardCount
1428            self.components = components
1429            return
1430       
1431        if link == u".":
1432            # Link to self
1433            self.upwardCount = 0
1434            self.components = []
1435            return
1436
1437        if link.startswith(u"//"):
1438            self.upwardCount = -1
1439            self.components = link[2:].split(u"/")
1440            return
1441       
1442        if link.startswith(u"/"):
1443            self.upwardCount = 0
1444            self.components = link[1:].split(u"/")
1445            return
1446
1447        comps = link.split(u"/")
1448
1449        for i in xrange(0, len(comps)):
1450            if comps[i] != "..":
1451                self.upwardCount = i + 1
1452                self.components = comps[i:]
1453                return
1454       
1455        self.upwardCount = len(comps)
1456        self.components = []
1457       
1458    def clone(self):
1459        result = _WikiLinkPath()
1460        result.upwardCount = self.upwardCount
1461        result.components = self.components[:]
1462       
1463        return result
1464
1465    def __repr__(self):
1466        return "_WikiLinkPath(upwardCount=%i, components=%s)" % \
1467                (self.upwardCount, repr(self.components))
1468
1469    def isAbsolute(self):
1470        return self.upwardCount == -1
1471       
1472    def join(self, otherPath):
1473        if otherPath.upwardCount == -1:
1474            self.upwardCount = -1
1475            self.components = otherPath.components[:]
1476            return
1477        elif otherPath.upwardCount == 0:
1478            self.components = self.components + otherPath.components
1479        else:
1480            if otherPath.upwardCount <= len(self.components):
1481                self.components = self.components[:-otherPath.upwardCount] + \
1482                        otherPath.components
1483            else:
1484                # Going back further than self was deep (eliminating
1485                # more components than self had)
1486
1487                if self.upwardCount == -1:
1488                    # Actually an error (going upward after already reaching root)
1489                    # TODO: Handle as error?
1490                    self.components = otherPath.components[:]
1491                else:
1492                    # Add up upwardCount of other path after subtracting
1493                    # number of own components because otherPath walked
1494                    # over them already
1495                    self.upwardCount += otherPath.upwardCount - \
1496                            len(self.components)
1497
1498                    self.components = otherPath.components[:]
1499
1500
1501    def getLinkCore(self):
1502        comps = u"/".join(self.components)
1503        if self.upwardCount == -1:
1504            return u"//" + comps
1505        elif self.upwardCount == 0:
1506            return u"/" + comps
1507        elif self.upwardCount == 1:
1508            return comps
1509        else:
1510            return u"/".join([u".."] * (self.upwardCount - 1)) + u"/" + comps
1511
1512
1513    def resolveWikiWord(self, basePath):
1514        if self.isAbsolute():
1515            # Absolute is checked separately so basePath can be None if
1516            # self is absolute
1517            return u"/".join(self.components)
1518
1519        absPath = basePath.joinTo(self)
1520        return u"/".join(absPath.components)
1521
1522
1523    def resolvePrefixSilenceAndWikiWordLink(self, basePath):
1524        """
1525        If using subpages this is used to resolve a link to the right wiki word
1526        for autocompletion. It returns a tuple (prefix, silence, pageName).
1527        Autocompletion now searches for all wiki words starting with pageName. For
1528        all found items it removes the first  silence  characters, prepends the  prefix
1529        instead and uses the result as suggestion for autocompletion.
1530       
1531        If prefix is None autocompletion is not possible.
1532        """
1533        if self.isAbsolute():
1534            return u"//", 0, self.resolveWikiWord(None)
1535
1536        assert basePath.isAbsolute()
1537       
1538        if len(self.components) == 0:
1539            # link path only consists of ".." -> autocompletion not possible
1540            if self.upwardCount == 0:
1541                return None, None, u"/".join(basePath.components)
1542
1543            return None, None, u"/".join(basePath.components[:-self.upwardCount])
1544
1545        if self.upwardCount == 0:
1546            return u"/", len(basePath.resolveWikiWord(None)) + 1, \
1547                    u"/".join(basePath.components + self.components)
1548
1549        def lenAddOne(s):
1550            return len(s) + 1 if s != "" else 0
1551
1552        if self.upwardCount == 1:
1553            return u"", \
1554                    lenAddOne(u"/".join(basePath.components[:-1])), \
1555                    u"/".join(basePath.components[:-1] + self.components)
1556
1557        return u"/".join([u".."] * (self.upwardCount - 1)) + u"/", \
1558                lenAddOne(u"/".join(basePath.components[:-self.upwardCount])), \
1559                u"/".join(basePath.components[:-self.upwardCount] +
1560                self.components)
1561
1562
1563
1564    def joinTo(self, otherPath):
1565        result = self.clone()
1566        result.join(otherPath)
1567        return result
1568
1569
1570
1571    @staticmethod
1572    def isAbsoluteLinkCore(linkCore):
1573        return linkCore.startswith(u"//")
1574
1575
1576    @staticmethod
1577    def getRelativePathByAbsPaths(targetAbsPath, baseAbsPath,
1578            downwardOnly=True):
1579        """
1580        Create a link to targetAbsPath relative to baseAbsPath.
1581        If downwardOnly is False, the link may contain parts to go to parents
1582            or siblings
1583        in path (in this wiki language, ".." are used for this).
1584        If downwardOnly is True, the function may return None if a relative
1585        link can't be constructed.
1586        """
1587        assert targetAbsPath.isAbsolute() and baseAbsPath.isAbsolute()
1588
1589        wordPath = targetAbsPath.components[:]
1590        baseWordPath = baseAbsPath.components[:]
1591       
1592        result = _WikiLinkPath()
1593       
1594        if downwardOnly:
1595            if len(baseWordPath) >= len(wordPath):
1596                return None
1597            if baseWordPath != wordPath[:len(baseWordPath)]:
1598                return None
1599           
1600            result.upwardCount = 0
1601            result.components = wordPath[len(baseWordPath):]
1602            return result
1603        # TODO test downwardOnly == False
1604        else:
1605            # Remove common path elements
1606            while len(wordPath) > 0 and len(baseWordPath) > 0 and \
1607                    wordPath[0] == baseWordPath[0]:
1608                del wordPath[0]
1609                del baseWordPath[0]
1610           
1611            if len(baseWordPath) == 0:
1612                if len(wordPath) == 0:
1613                    return None  # word == baseWord, TODO return u"." or something
1614
1615                result.upwardCount = 0
1616                result.components = wordPath
1617                return result
1618
1619            result.upwardCount = len(baseWordPath)
1620            result.components = wordPath
1621            return result
1622       
1623
1624
1625
1626
1627
1628
1629_RE_LINE_INDENT = re.compile(ur"^[ \t]*")
1630
1631class _TheHelper(object):
1632    @staticmethod
1633    def reset():
1634        pass
1635
1636    @staticmethod
1637    def getWikiLanguageName():
1638        return WIKI_LANGUAGE_NAME
1639
1640
1641    # TODO More descriptive error messages (which character(s) is/are wrong?)
1642    @staticmethod   # isValidWikiWord
1643    def checkForInvalidWikiWord(word, wikiDocument=None, settings=None):
1644        """
1645        Test if word is syntactically a valid wiki word and no settings
1646        are against it. The camelCase black list is not checked.
1647        The function returns None IFF THE WORD IS VALID, an error string
1648        otherwise
1649        """
1650        if settings is not None and settings.has_key("footnotesAsWws"):
1651            footnotesAsWws = settings["footnotesAsWws"]
1652        else:
1653            if wikiDocument is None:
1654                footnotesAsWws = False
1655            else:
1656                footnotesAsWws = wikiDocument.getWikiConfig().getboolean(
1657                        "main", "footnotes_as_wikiwords", False)
1658
1659        if not footnotesAsWws and footnoteRE.match(word):
1660            return _(u"This is a footnote")
1661
1662        if wikiPageNameRE.match(word):
1663            return None
1664        else:
1665            return _(u"This is syntactically not a wiki word")
1666
1667
1668    # TODO More descriptive error messages (which character(s) is/are wrong?)
1669    @staticmethod   # isValidWikiWord
1670    def checkForInvalidWikiLink(word, wikiDocument=None, settings=None):
1671        """
1672        Test if word is syntactically a valid wiki word and no settings
1673        are against it. The camelCase black list is not checked.
1674        The function returns None IFF THE WORD IS VALID, an error string
1675        otherwise
1676        """
1677        if settings is not None and settings.has_key("footnotesAsWws"):
1678            footnotesAsWws = settings["footnotesAsWws"]
1679        else:
1680            if wikiDocument is None:
1681                footnotesAsWws = False
1682            else:
1683                footnotesAsWws = wikiDocument.getWikiConfig().getboolean(
1684                        "main", "footnotes_as_wikiwords", False)
1685
1686        if not footnotesAsWws and footnoteRE.match(word):
1687            return _(u"This is a footnote")
1688
1689        if wikiLinkCoreRE.match(word):
1690            return None
1691        else:
1692            return _(u"This is syntactically not a wiki word")
1693
1694
1695    @staticmethod
1696    def extractWikiWordFromLink(word, wikiDocument=None, basePage=None):  # TODO Problems with subpages?
1697        """
1698        Strip brackets and other link details if present and return wikiWord
1699        if a valid wiki word can be extracted, None otherwise.
1700        """
1701        if wikiDocument is None and basePage is not None:
1702            wikiDocument = basePage.getWikiDocument()
1703
1704        if basePage is None:
1705            baseDict = _buildBaseDict(wikiDocument=wikiDocument)
1706        else:
1707            baseDict = _buildBaseDict(formatDetails=basePage.getFormatDetails())
1708
1709        try:
1710            t = extractableWikiWord.parseString(word, parseAll=True,
1711                    baseDict=baseDict)
1712            t = t[0]
1713            return t.wikiWord
1714        except ParseException:
1715            return None
1716
1717
1718    resolveWikiWordLink = staticmethod(resolveWikiWordLink)
1719    """
1720    If using subpages this is used to resolve a link to the right wiki word
1721    relative to basePage on which the link is placed.
1722    It returns the absolute link (page name).
1723    """
1724
1725
1726    @staticmethod
1727    def resolvePrefixSilenceAndWikiWordLink(link, basePage):
1728        """
1729        If using subpages this is used to resolve a link to the right wiki word
1730        for autocompletion. It returns a tuple (prefix, silence, pageName).
1731        Autocompletion now searches for all wiki words starting with pageName. For
1732        all found items it removes the first  silence  characters, prepends the  prefix
1733        instead and uses the result as suggestion for autocompletion.
1734       
1735        If prefix is None autocompletion is not possible.
1736        """
1737        linkPath = _WikiLinkPath(link=link)
1738        if linkPath.isAbsolute():
1739            return linkPath.resolvePrefixSilenceAndWikiWordLink(None)
1740
1741        if basePage is None:
1742            return u"", 0, link  # TODO:  Better reaction?
1743       
1744        basePageName = basePage.getWikiWord()
1745        if basePageName is None:
1746            return u"", 0, link  # TODO:  Better reaction?
1747       
1748        return linkPath.resolvePrefixSilenceAndWikiWordLink(_WikiLinkPath(
1749                pageName=basePageName))
1750
1751
1752
1753    @staticmethod
1754    def parseTodoValue(todoValue, wikiDocument=None):
1755        """
1756        Parse a todo value (right of the colon) and return the node or
1757        return None if value couldn't be parsed
1758        """
1759        baseDict = _buildBaseDict(wikiDocument=wikiDocument)
1760        try:
1761            t = todoContent.parseString(todoValue, parseAll=True,
1762                    baseDict=baseDict)
1763            return t[0]
1764        except:
1765            return None
1766
1767
1768    @staticmethod
1769    def parseTodoEntry(entry, wikiDocument=None):
1770        """
1771        Parse a complete todo entry (without end-token) and return the node or
1772        return None if value couldn't be parsed
1773        """
1774        baseDict = _buildBaseDict(wikiDocument=wikiDocument)
1775        try:
1776            t = todoAsWhole.parseString(entry, parseAll=True,
1777                    baseDict=baseDict)
1778            return t[0]
1779        except:
1780            traceback.print_exc()
1781            return None
1782
1783
1784    @staticmethod
1785    def _createAutoLinkRelaxWordEntryRE(word):
1786        """
1787        Get compiled regular expression for one word in autoLink "relax"
1788        mode.
1789
1790        Not part of public API.
1791        """
1792        # Split into parts of contiguous alphanumeric characters
1793        parts = AutoLinkRelaxSplitRE.split(word)
1794        # Filter empty parts
1795        parts = [p for p in parts if p != u""]
1796
1797        # Instead of original non-alphanum characters allow arbitrary
1798        # non-alphanum characters
1799        pat = ur"\b" + (AutoLinkRelaxJoinPAT.join(parts)) + ur"\b"
1800        regex = re.compile(pat, AutoLinkRelaxJoinFlags)
1801
1802        return regex
1803
1804
1805    @staticmethod
1806    def buildAutoLinkRelaxInfo(wikiDocument):
1807        """
1808        Build some cache info needed to process auto-links in "relax" mode.
1809        This info will be given back in the formatDetails when calling
1810        _TheParser.parse().
1811        The implementation for this plugin creates a list of regular
1812        expressions and the related wiki words, but this is not mandatory.
1813        """
1814        # Build up regular expression
1815        # First fetch all wiki words
1816        words = wikiDocument.getWikiData().getAllProducedWikiLinks()
1817
1818        # Sort longest words first
1819        words.sort(key=lambda w: len(w), reverse=True)
1820       
1821        return [(_TheHelper._createAutoLinkRelaxWordEntryRE(w), w)
1822                for w in words if w != u""]
1823
1824
1825    @staticmethod
1826    def createWikiLinkPathObject(*args, **kwargs):
1827        return _WikiLinkPath(*args, **kwargs)
1828
1829
1830    @staticmethod
1831    def isAbsoluteLinkCore(linkCore):
1832        return _WikiLinkPath.isAbsoluteLinkCore(linkCore)
1833
1834
1835    @staticmethod
1836    def createLinkFromWikiWord(word, wikiPage, forceAbsolute=False):
1837        """
1838        Create a link from word which should be put on wikiPage.
1839        """
1840        wikiDocument = wikiPage.getWikiDocument()
1841       
1842        targetPath = _WikiLinkPath(pageName=word)
1843
1844        if forceAbsolute:
1845            return BracketStart + targetPath.getLinkCore() + BracketEnd
1846
1847        linkCore = _TheHelper.createRelativeLinkFromWikiWord(
1848                word, wikiPage.getWikiWord(), downwardOnly=False)
1849               
1850        if _TheHelper.isCcWikiWord(word) and _TheHelper.isCcWikiWord(linkCore):
1851            wikiFormatDetails = wikiPage.getFormatDetails()
1852            if wikiFormatDetails.withCamelCase:
1853               
1854                ccBlacklist = wikiDocument.getCcWordBlacklist()
1855                if not word in ccBlacklist:
1856                    return linkCore
1857       
1858        return BracketStart + linkCore + BracketEnd
1859
1860
1861    @staticmethod
1862    def createAbsoluteLinksFromWikiWords(words, wikiPage=None):
1863        """
1864        Create particularly stable links from a list of words which should be
1865        put on wikiPage.
1866        """
1867        return u"\n".join([u"%s//%s%s" % (BracketStart, w, BracketEnd)
1868                for w in words])
1869               
1870#     # For compatibility. TODO: Remove
1871#     createStableLinksFromWikiWords = createAbsoluteLinksFromWikiWords
1872
1873    @staticmethod
1874    def createRelativeLinkFromWikiWord(word, baseWord, downwardOnly=True):
1875        """
1876        Create a link to wikiword word relative to baseWord.
1877        If downwardOnly is False, the link may contain parts to go to parents
1878            or siblings
1879        in path (in this wiki language, ".." are used for this).
1880        If downwardOnly is True, the function may return None if a relative
1881        link can't be constructed.
1882        """
1883       
1884        relPath = _WikiLinkPath.getRelativePathByAbsPaths(_WikiLinkPath(
1885                pageName=word), _WikiLinkPath(pageName=baseWord),
1886                downwardOnly=downwardOnly)
1887       
1888        if relPath is None:
1889            return None
1890       
1891        return relPath.getLinkCore()
1892
1893    @staticmethod
1894    def createUrlLinkFromPath(wikiDocument, path, relative=False,
1895            bracketed=False, protocol=None):
1896        if bracketed:
1897            addSafe = ' '
1898        else:
1899            addSafe = ''
1900
1901        if relative:
1902            url = wikiDocument.makeAbsPathRelUrl(path, addSafe=addSafe)
1903
1904            if url is None:
1905                # Relative not possible -> absolute instead
1906                relative = False
1907            else:
1908                if protocol == "wiki":
1909                    url = u"wiki" + url  # Combines to "wikirel://"
1910
1911        if not relative:
1912            if protocol == "wiki":
1913                url = u"wiki:" + urlFromPathname(path, addSafe=addSafe)
1914            else:
1915                url = u"file:" + urlFromPathname(path, addSafe=addSafe)
1916
1917        if bracketed:
1918            url = BracketStart + url + BracketEnd
1919       
1920        return url
1921
1922
1923    @staticmethod
1924    def createAttributeFromComponents(key, value, wikiPage=None):
1925        """
1926        Build an attribute from key and value.
1927        TODO: Check for necessary escaping
1928        """
1929        return u"%s%s: %s%s\n" % (BracketStart, key, value, BracketEnd)
1930
1931
1932    @staticmethod
1933    def isCcWikiWord(word):
1934        return bool(wikiWordCcRE.match(word))
1935
1936
1937    @staticmethod
1938    def findNextWordForSpellcheck(text, startPos, wikiPage):
1939        """
1940        Find in text next word to spellcheck, beginning at position startPos
1941       
1942        Returns tuple (start, end, spWord) which is either (None, None, None)
1943        if no more word can be found or returns start and after-end of the
1944        spWord to spellcheck.
1945       
1946        TODO: Move away because this is specific to human language,
1947            not wiki language.
1948        """
1949        while True:
1950            mat = TextWordRE.search(text, startPos)
1951            if mat is None:
1952                # No further word
1953                return (None, None, None)
1954
1955            if mat.group("negative") is not None:
1956                startPos = mat.end()
1957                continue
1958
1959            start, end = mat.span()
1960            spWord = mat.group()
1961
1962            return (start, end, spWord)
1963
1964
1965    @staticmethod
1966    def prepareAutoComplete(editor, text, charPos, lineStartCharPos,
1967            wikiDocument, docPage, settings):
1968        """
1969        Called when user wants autocompletion.
1970        text -- Whole text of page
1971        charPos -- Cursor position in characters
1972        lineStartCharPos -- For convenience and speed, position of the
1973                start of text line in which cursor is.
1974        wikiDocument -- wiki document object
1975        docPage -- DocPage object on which autocompletion is done
1976        closingBracket -- boolean iff a closing bracket should be suggested
1977                for bracket wikiwords and attributes
1978
1979        returns -- a list of tuples (sortKey, entry, backStepChars) where
1980            sortKey -- unistring to use for sorting entries alphabetically
1981                using right collator
1982            entry -- actual unistring entry to show and to insert if
1983                selected
1984            backStepChars -- numbers of chars to delete to the left of cursor
1985                before inserting entry
1986        """
1987        line = text[lineStartCharPos:charPos]
1988        rline = revStr(line)
1989        backStepMap = {}
1990        closingBracket = settings.get("closingBracket", False)
1991        builtinAttribs = settings.get("builtinAttribs", False)
1992
1993        # TODO Sort entries appropriately (whatever this means)
1994
1995        wikiData = wikiDocument.getWikiData()
1996        baseWordSegments = docPage.getWikiWord().split(u"/")
1997
1998        mat1 = RevWikiWordRE.match(rline)
1999        if mat1:
2000            # may be CamelCase word
2001            tofind = line[-mat1.end():]
2002            backstep = len(tofind)
2003            prefix, silence, tofind = _TheHelper.resolvePrefixSilenceAndWikiWordLink(
2004                    tofind, docPage)
2005           
2006            # We don't want prefixes here
2007            if prefix == u"":
2008                ccBlacklist = wikiDocument.getCcWordBlacklist()
2009                for word in wikiData.getWikiPageLinkTermsStartingWith(tofind, True):
2010                    if not _TheHelper.isCcWikiWord(word[silence:]) or word in ccBlacklist:
2011                        continue
2012
2013                    backStepMap[word[silence:]] = backstep
2014
2015        mat2 = RevWikiWordRE2.match(rline)
2016        mat3 = RevAttributeValue.match(rline)
2017        if mat2:
2018            # may be not-CamelCase word or in an attribute name
2019            tofind = line[-mat2.end():]
2020
2021            # Should a closing bracket be appended to suggested words?
2022            if closingBracket:
2023                wordBracketEnd = BracketEnd
2024            else:
2025                wordBracketEnd = u""
2026           
2027            backstep = len(tofind)
2028
2029            prefix, silence, link = _TheHelper.resolvePrefixSilenceAndWikiWordLink(
2030                    tofind[len(BracketStart):], docPage)
2031           
2032            if prefix is not None:
2033                for word in wikiData.getWikiPageLinkTermsStartingWith(
2034                        link, True):
2035                    backStepMap[BracketStart + prefix + word[silence:] +
2036                            wordBracketEnd] = backstep
2037
2038            for prop in wikiDocument.getAttributeNamesStartingWith(
2039                    tofind[len(BracketStart):], builtinAttribs):
2040                backStepMap[BracketStart + prop] = backstep
2041        elif mat3:
2042            # In an attribute value
2043            tofind = line[-mat3.end():]
2044            propkey = revStr(mat3.group(3))
2045            propfill = revStr(mat3.group(2))
2046            propvalpart = revStr(mat3.group(1))
2047            values = filter(lambda pv: pv.startswith(propvalpart),
2048                    wikiDocument.getDistinctAttributeValuesByKey(propkey,
2049                    builtinAttribs))
2050
2051            for v in values:
2052                backStepMap[BracketStart + propkey +
2053                        propfill + v + BracketEnd] = len(tofind)
2054
2055        mat = RevTodoKeyRE.match(rline)
2056        if mat:
2057            # Might be todo entry
2058            tofind = line[-mat.end():]
2059            for t in wikiData.getTodos():
2060                td = t[1]
2061                if not td.startswith(tofind):
2062                    continue
2063
2064#                 tdmat = ToDoREWithCapturing.match(td)
2065#                 key = tdmat.group(1) + u":"
2066                key = td + u":"
2067                backStepMap[key] = len(tofind)
2068
2069        mat = RevTodoValueRE.match(rline)
2070        if mat:
2071            # Might be todo entry
2072            tofind = line[-mat.end():]
2073            combinedTodos = [t[1] + ":" + t[2] for t in wikiData.getTodos()]
2074#             todos = [t[1] for t in wikiData.getTodos() if t[1].startswith(tofind)]
2075            todos = [t for t in combinedTodos if t.startswith(tofind)]
2076            for t in todos:
2077                backStepMap[t] = len(tofind)
2078
2079        mat = RevWikiWordAnchorRE2.match(rline)
2080        if mat:
2081            # In an anchor of a possible bracketed wiki word
2082            tofind = line[-mat.end():]
2083            wikiLinkCore = revStr(mat.group("wikiWord"))
2084            wikiWord = _TheHelper.resolvePrefixSilenceAndWikiWordLink(
2085                    wikiLinkCore, docPage)[2]
2086
2087            anchorBegin = revStr(mat.group("anchorBegin"))
2088
2089            try:
2090                page = wikiDocument.getWikiPage(wikiWord) # May throw exception
2091                anchors = [a for a in page.getAnchors()
2092                        if a.startswith(anchorBegin)]
2093
2094                for a in anchors:
2095                    backStepMap[BracketStart + wikiLinkCore +
2096                            BracketEnd +
2097                            WikiWordAnchorStart + a] = len(tofind)
2098            except WikiWordNotFoundException:
2099                # wikiWord isn't a wiki word
2100                pass
2101
2102        mat = RevWikiWordAnchorRE.match(rline)
2103        if mat:
2104            # In an anchor of a possible camel case word
2105            tofind = line[-mat.end():]
2106            wikiLinkCore = revStr(mat.group("wikiWord"))
2107            wikiWord = _TheHelper.resolvePrefixSilenceAndWikiWordLink(
2108                    wikiLinkCore, docPage)[2]
2109
2110            anchorBegin = revStr(mat.group("anchorBegin"))
2111
2112            try:
2113                page = wikiDocument.getWikiPage(wikiWord) # May throw exception
2114                anchors = [a for a in page.getAnchors()
2115                        if a.startswith(anchorBegin)]
2116                       
2117                for a in anchors:
2118                    backStepMap[wikiWord + wikiLinkCore +
2119                            a] = len(tofind)
2120            except WikiWordNotFoundException:
2121                # wikiWord isn't a wiki word
2122                pass
2123
2124
2125        acresult = backStepMap.keys()
2126       
2127        if len(acresult) > 0:
2128            # formatting.BracketEnd
2129            acresultTuples = []
2130            for r in acresult:
2131                if r.endswith(BracketEnd):
2132                    rc = r[: -len(BracketEnd)]
2133                else:
2134                    rc = r
2135                acresultTuples.append((rc, r, backStepMap[r]))
2136
2137            return acresultTuples
2138        else:
2139            return []
2140
2141
2142    @staticmethod
2143    def handleNewLineBeforeEditor(editor, text, charPos, lineStartCharPos,
2144            wikiDocument, settings):
2145        """
2146        Processes pressing of a newline in editor before editor processes it.
2147        Returns True iff the actual newline should be processed by
2148            editor yet.
2149        """
2150        # autoIndent, autoBullet, autoUnbullet
2151       
2152        line = text[lineStartCharPos:charPos]
2153
2154        if settings.get("autoUnbullet", False):
2155            # Check for lonely bullet or number
2156            mat = BulletRE.match(line)
2157            if mat and mat.end(0) == len(line):
2158                editor.SetSelectionByCharPos(lineStartCharPos, charPos)
2159                editor.ReplaceSelection(mat.group("indentBullet"))
2160                return False
2161
2162            mat = NumericSimpleBulletRE.match(line)
2163            if mat and mat.end(0) == len(line):
2164                editor.SetSelectionByCharPos(lineStartCharPos, charPos)
2165                editor.ReplaceSelection(mat.group("indentBullet"))
2166                return False
2167
2168            mat = NumericBulletRE.match(line)
2169            if mat and mat.end(0) == len(line):
2170                replacement = mat.group("indentNumeric")
2171                if mat.group("preLastNumeric") != u"":
2172                    replacement += mat.group("preLastNumeric") + u" "
2173
2174                editor.SetSelectionByCharPos(lineStartCharPos, charPos)
2175                editor.ReplaceSelection(replacement)
2176                return False
2177       
2178        return True
2179
2180
2181    @staticmethod
2182    def handleNewLineAfterEditor(editor, text, charPos, lineStartCharPos,
2183            wikiDocument, settings):
2184        """
2185        Processes pressing of a newline after editor processed it (if
2186        handleNewLineBeforeEditor returned True).
2187        """
2188        # autoIndent, autoBullet, autoUnbullet
2189
2190        currentLine = editor.GetCurrentLine()
2191
2192        if currentLine > 0:
2193            previousLine = editor.GetLine(currentLine - 1)
2194            indent = _RE_LINE_INDENT.match(previousLine).group(0)
2195   
2196            # check if the prev level was a bullet level
2197            if settings.get("autoBullets", False):
2198                match = BulletRE.match(previousLine)
2199                if match:
2200                    editor.AddText(indent + match.group("actualBullet"))
2201                    return
2202
2203                match = NumericSimpleBulletRE.match(previousLine)
2204                if match:
2205                    editor.AddText(indent + match.group("actualBullet"))
2206                    return False
2207
2208                match = NumericBulletRE.search(previousLine)
2209                if match:
2210                    prevNumStr = match.group(3)
2211                    prevNum = int(prevNumStr)
2212                    nextNum = prevNum+1
2213#                     adjustment = len(str(nextNum)) - len(prevNumStr)
2214#                     if adjustment == 0:
2215                    editor.AddText(u"%s%s%d. " % (indent,
2216                            match.group(2), int(prevNum)+1))
2217#                     else:
2218#                         editor.AddText(u"%s%s%d. " % (u" " *
2219#                                 (editor.GetLineIndentation(currentLine - 1) - adjustment),
2220#                                 match.group(2), int(prevNum)+1))
2221                    return
2222
2223            if settings.get("autoIndent", False):
2224                editor.AddText(indent)
2225                return
2226
2227
2228    @staticmethod
2229    def handleRewrapText(editor, settings):
2230        curPos = editor.GetCurrentPos()
2231
2232        # search back for start of the para
2233        curLineNum = editor.GetCurrentLine()
2234        curLine = editor.GetLine(curLineNum)
2235        while curLineNum > 0:
2236            # don't wrap previous bullets with this bullet
2237            if (BulletRE.match(curLine) or NumericBulletRE.match(curLine)):
2238                break
2239
2240            if EmptyLineRE.match(curLine):
2241                curLineNum = curLineNum + 1
2242                break
2243
2244            curLineNum = curLineNum - 1
2245            curLine = editor.GetLine(curLineNum)
2246        startLine = curLineNum
2247
2248        # search forward for end of the para
2249        curLineNum = editor.GetCurrentLine()
2250        curLine = editor.GetLine(curLineNum)
2251        while curLineNum <= editor.GetLineCount():
2252            # don't wrap the next bullet with this bullet
2253            if curLineNum > startLine:
2254                if (BulletRE.match(curLine) or NumericBulletRE.match(curLine)):
2255                    curLineNum = curLineNum - 1
2256                    break
2257
2258            if EmptyLineRE.match(curLine):
2259                curLineNum = curLineNum - 1
2260                break
2261
2262            curLineNum = curLineNum + 1
2263            curLine = editor.GetLine(curLineNum)
2264        endLine = curLineNum
2265       
2266        if (startLine <= endLine):
2267            # get the start and end of the lines
2268            startPos = editor.PositionFromLine(startLine)
2269            endPos = editor.GetLineEndPosition(endLine)
2270
2271            # get the indentation for rewrapping
2272            indent = _RE_LINE_INDENT.match(editor.GetLine(startLine)).group(0)
2273            subIndent = indent
2274
2275            # if the start of the para is a bullet the subIndent has to change
2276            if BulletRE.match(editor.GetLine(startLine)):
2277                subIndent = indent + u"  "
2278            else:
2279                match = NumericBulletRE.match(editor.GetLine(startLine))
2280                if match:
2281                    subIndent = indent + u" " * (len(match.group(2)) + 2)
2282
2283            # get the text that will be wrapped
2284            text = editor.GetTextRange(startPos, endPos)
2285            # remove spaces, newlines, etc
2286            text = re.sub("[\s\r\n]+", " ", text)
2287
2288            # wrap the text
2289            wrapPosition = 70
2290            try:
2291                wrapPosition = int(
2292                        editor.getLoadedDocPage().getAttributeOrGlobal(
2293                        "wrap", "70"))
2294            except:
2295                pass
2296
2297            # make the min wrapPosition 5
2298            if wrapPosition < 5:
2299                wrapPosition = 5
2300
2301            filledText = fill(text, width=wrapPosition,
2302                    initial_indent=indent,
2303                    subsequent_indent=subIndent)
2304
2305            # replace the text based on targetting
2306            editor.SetTargetStart(startPos)
2307            editor.SetTargetEnd(endPos)
2308            editor.ReplaceTarget(filledText)
2309            editor.GotoPos(curPos)
2310
2311
2312    @staticmethod
2313    def getNewDefaultWikiSettingsPage(mainControl):
2314        """
2315        Return default text of the "WikiSettings" page for a new wiki.
2316        """
2317        return _(u"""++ Wiki Settings
2318
2319These are your default global settings.
2320
2321[global.importance.low.color: grey]
2322[global.importance.high.bold: true]
2323[global.contact.icon: contact]
2324[global.wrap: 70]
2325
2326[icon: cog]
2327""")  # TODO Localize differently?
2328
2329
2330    @staticmethod
2331    def createWikiLanguageDetails(wikiDocument, docPage):
2332        """
2333        Returns a new WikiLanguageDetails object based on current configuration
2334        """
2335        return WikiLanguageDetails(wikiDocument, docPage)
2336
2337
2338THE_LANGUAGE_HELPER = _TheHelper()
2339
2340
2341
2342def describeWikiLanguage(ver, app):
2343    """
2344    API function for "WikiParser" plugins
2345    Returns a sequence of tuples describing the supported
2346    insertion keys. Each tuple has the form (intLanguageName, hrLanguageName,
2347            parserFactory, parserIsThreadsafe, editHelperFactory,
2348            editHelperIsThreadsafe)
2349    Where the items mean:
2350        intLanguageName -- internal unique name (should be ascii only) to
2351            identify wiki language processed by parser
2352        hrLanguageName -- human readable language name, unistring
2353            (TODO: localization)
2354        parserFactory -- factory function to create parser object(s) fulfilling
2355
2356        parserIsThreadsafe -- boolean if parser is threadsafe. If not this
2357            will currently lead to a very inefficient operation
2358        processHelperFactory -- factory for helper object containing further
2359            functions needed for editing, tree presentation and so on.
2360        editHelperIsThreadsafe -- boolean if edit helper functions are
2361            threadsafe.
2362
2363    Parameters:
2364
2365    ver -- API version (can only be 1 currently)
2366    app -- wxApp object
2367    """
2368
2369    return ((WIKI_LANGUAGE_NAME, WIKI_HR_LANGUAGE_NAME, parserFactory,
2370             True, languageHelperFactory, True),)
2371
2372
2373
2374
2375def parserFactory(intLanguageName, debugMode):
2376    """
2377    Builds up a parser object. If the parser is threadsafe this function is
2378    allowed to return the same object multiple times (currently it should do
2379    so for efficiency).
2380    For seldom needed parsers it is recommended to put the actual parser
2381    construction as singleton in this function to reduce startup time of WikidPad.
2382    For non-threadsafe parsers it is required to create one inside this
2383    function at each call.
2384
2385    intLanguageName -- internal unique name (should be ascii only) to
2386        identify wiki language to process by parser
2387    """
2388    if text.getDebug() != debugMode:
2389        text.setDebugRecurs(debugMode)
2390
2391    return THE_PARSER
2392
2393
2394def languageHelperFactory(intLanguageName, debugMode):
2395    """
2396    Builds up a language helper object. If the object is threadsafe this function is
2397    allowed to return the same object multiple times (currently it should do
2398    so for efficiency).
2399
2400    intLanguageName -- internal unique name (should be ascii only) to
2401        identify wiki language to process by helper
2402    """
2403    return THE_LANGUAGE_HELPER
2404
Note: See TracBrowser for help on using the browser.