diff --git a/kernel/parsedownextra.class.php b/kernel/parsedownextra.class.php new file mode 100644 index 00000000..be6966d2 --- /dev/null +++ b/kernel/parsedownextra.class.php @@ -0,0 +1,526 @@ +BlockTypes[':'] []= 'DefinitionList'; + $this->BlockTypes['*'] []= 'Abbreviation'; + + # identify footnote definitions before reference definitions + array_unshift($this->BlockTypes['['], 'Footnote'); + + # identify footnote markers before before links + array_unshift($this->InlineTypes['['], 'FootnoteMarker'); + } + + # + # ~ + + function text($text) + { + $markup = parent::text($text); + + # merge consecutive dl elements + + $markup = preg_replace('/<\/dl>\s+
\s+/', '', $markup); + + # add footnotes + + if (isset($this->DefinitionData['Footnote'])) + { + $Element = $this->buildFootnoteElement(); + + $markup .= "\n" . $this->element($Element); + } + + return $markup; + } + + # + # Blocks + # + + # + # Abbreviation + + protected function blockAbbreviation($Line) + { + if (preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $Line['text'], $matches)) + { + $this->DefinitionData['Abbreviation'][$matches[1]] = $matches[2]; + + $Block = array( + 'hidden' => true, + ); + + return $Block; + } + } + + # + # Footnote + + protected function blockFootnote($Line) + { + if (preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $Line['text'], $matches)) + { + $Block = array( + 'label' => $matches[1], + 'text' => $matches[2], + 'hidden' => true, + ); + + return $Block; + } + } + + protected function blockFootnoteContinue($Line, $Block) + { + if ($Line['text'][0] === '[' and preg_match('/^\[\^(.+?)\]:/', $Line['text'])) + { + return; + } + + if (isset($Block['interrupted'])) + { + if ($Line['indent'] >= 4) + { + $Block['text'] .= "\n\n" . $Line['text']; + + return $Block; + } + } + else + { + $Block['text'] .= "\n" . $Line['text']; + + return $Block; + } + } + + protected function blockFootnoteComplete($Block) + { + $this->DefinitionData['Footnote'][$Block['label']] = array( + 'text' => $Block['text'], + 'count' => null, + 'number' => null, + ); + + return $Block; + } + + # + # Definition List + + protected function blockDefinitionList($Line, $Block) + { + if ( ! isset($Block) or isset($Block['type'])) + { + return; + } + + $Element = array( + 'name' => 'dl', + 'handler' => 'elements', + 'text' => array(), + ); + + $terms = explode("\n", $Block['element']['text']); + + foreach ($terms as $term) + { + $Element['text'] []= array( + 'name' => 'dt', + 'handler' => 'line', + 'text' => $term, + ); + } + + $Block['element'] = $Element; + + $Block = $this->addDdElement($Line, $Block); + + return $Block; + } + + protected function blockDefinitionListContinue($Line, array $Block) + { + if ($Line['text'][0] === ':') + { + $Block = $this->addDdElement($Line, $Block); + + return $Block; + } + else + { + if (isset($Block['interrupted']) and $Line['indent'] === 0) + { + return; + } + + if (isset($Block['interrupted'])) + { + $Block['dd']['handler'] = 'text'; + $Block['dd']['text'] .= "\n\n"; + + unset($Block['interrupted']); + } + + $text = substr($Line['body'], min($Line['indent'], 4)); + + $Block['dd']['text'] .= "\n" . $text; + + return $Block; + } + } + + # + # Header + + protected function blockHeader($Line) + { + $Block = parent::blockHeader($Line); + + if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) + { + $attributeString = $matches[1][0]; + + $Block['element']['attributes'] = $this->parseAttributeData($attributeString); + + $Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]); + } + + return $Block; + } + + # + # Markup + + protected function blockMarkupComplete($Block) + { + if ( ! isset($Block['void'])) + { + $Block['markup'] = $this->processTag($Block['markup']); + } + + return $Block; + } + + # + # Setext + + protected function blockSetextHeader($Line, array $Block = null) + { + $Block = parent::blockSetextHeader($Line, $Block); + + if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) + { + $attributeString = $matches[1][0]; + + $Block['element']['attributes'] = $this->parseAttributeData($attributeString); + + $Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]); + } + + return $Block; + } + + # + # Inline Elements + # + + # + # Footnote Marker + + protected function inlineFootnoteMarker($Excerpt) + { + if (preg_match('/^\[\^(.+?)\]/', $Excerpt['text'], $matches)) + { + $name = $matches[1]; + + if ( ! isset($this->DefinitionData['Footnote'][$name])) + { + return; + } + + $this->DefinitionData['Footnote'][$name]['count'] ++; + + if ( ! isset($this->DefinitionData['Footnote'][$name]['number'])) + { + $this->DefinitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; # ยป & + } + + $Element = array( + 'name' => 'sup', + 'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name), + 'handler' => 'element', + 'text' => array( + 'name' => 'a', + 'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'), + 'text' => $this->DefinitionData['Footnote'][$name]['number'], + ), + ); + + return array( + 'extent' => strlen($matches[0]), + 'element' => $Element, + ); + } + } + + private $footnoteCount = 0; + + # + # Link + + protected function inlineLink($Excerpt) + { + $Link = parent::inlineLink($Excerpt); + + $remainder = substr($Excerpt['text'], $Link['extent']); + + if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches)) + { + $Link['element']['attributes'] += $this->parseAttributeData($matches[1]); + + $Link['extent'] += strlen($matches[0]); + } + + return $Link; + } + + # + # ~ + # + + protected function unmarkedText($text) + { + $text = parent::unmarkedText($text); + + if (isset($this->DefinitionData['Abbreviation'])) + { + foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning) + { + $pattern = '/\b'.preg_quote($abbreviation, '/').'\b/'; + + $text = preg_replace($pattern, ''.$abbreviation.'', $text); + } + } + + return $text; + } + + # + # Util Methods + # + + protected function addDdElement(array $Line, array $Block) + { + $text = substr($Line['text'], 1); + $text = trim($text); + + unset($Block['dd']); + + $Block['dd'] = array( + 'name' => 'dd', + 'handler' => 'line', + 'text' => $text, + ); + + if (isset($Block['interrupted'])) + { + $Block['dd']['handler'] = 'text'; + + unset($Block['interrupted']); + } + + $Block['element']['text'] []= & $Block['dd']; + + return $Block; + } + + protected function buildFootnoteElement() + { + $Element = array( + 'name' => 'div', + 'attributes' => array('class' => 'footnotes'), + 'handler' => 'elements', + 'text' => array( + array( + 'name' => 'hr', + ), + array( + 'name' => 'ol', + 'handler' => 'elements', + 'text' => array(), + ), + ), + ); + + uasort($this->DefinitionData['Footnote'], 'self::sortFootnotes'); + + foreach ($this->DefinitionData['Footnote'] as $definitionId => $DefinitionData) + { + if ( ! isset($DefinitionData['number'])) + { + continue; + } + + $text = $DefinitionData['text']; + + $text = parent::text($text); + + $numbers = range(1, $DefinitionData['count']); + + $backLinksMarkup = ''; + + foreach ($numbers as $number) + { + $backLinksMarkup .= ' '; + } + + $backLinksMarkup = substr($backLinksMarkup, 1); + + if (substr($text, - 4) === '

') + { + $backLinksMarkup = ' '.$backLinksMarkup; + + $text = substr_replace($text, $backLinksMarkup.'

', - 4); + } + else + { + $text .= "\n".'

'.$backLinksMarkup.'

'; + } + + $Element['text'][1]['text'] []= array( + 'name' => 'li', + 'attributes' => array('id' => 'fn:'.$definitionId), + 'text' => "\n".$text."\n", + ); + } + + return $Element; + } + + # ~ + + protected function parseAttributeData($attributeString) + { + $Data = array(); + + $attributes = preg_split('/[ ]+/', $attributeString, - 1, PREG_SPLIT_NO_EMPTY); + + foreach ($attributes as $attribute) + { + if ($attribute[0] === '#') + { + $Data['id'] = substr($attribute, 1); + } + else # "." + { + $classes []= substr($attribute, 1); + } + } + + if (isset($classes)) + { + $Data['class'] = implode(' ', $classes); + } + + return $Data; + } + + # ~ + + protected function processTag($elementMarkup) # recursive + { + # http://stackoverflow.com/q/1148928/200145 + libxml_use_internal_errors(true); + + $DOMDocument = new DOMDocument; + + # http://stackoverflow.com/q/11309194/200145 + $elementMarkup = mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8'); + + # http://stackoverflow.com/q/4879946/200145 + $DOMDocument->loadHTML($elementMarkup); + $DOMDocument->removeChild($DOMDocument->doctype); + $DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild); + + $elementText = ''; + + if ($DOMDocument->documentElement->getAttribute('markdown') === '1') + { + foreach ($DOMDocument->documentElement->childNodes as $Node) + { + $elementText .= $DOMDocument->saveHTML($Node); + } + + $DOMDocument->documentElement->removeAttribute('markdown'); + + $elementText = "\n".$this->text($elementText)."\n"; + } + else + { + foreach ($DOMDocument->documentElement->childNodes as $Node) + { + $nodeMarkup = $DOMDocument->saveHTML($Node); + + if ($Node instanceof DOMElement and ! in_array($Node->nodeName, $this->textLevelElements)) + { + $elementText .= $this->processTag($nodeMarkup); + } + else + { + $elementText .= $nodeMarkup; + } + } + } + + # because we don't want for markup to get encoded + $DOMDocument->documentElement->nodeValue = 'placeholder\x1A'; + + $markup = $DOMDocument->saveHTML($DOMDocument->documentElement); + $markup = str_replace('placeholder\x1A', $elementText, $markup); + + return $markup; + } + + # ~ + + protected function sortFootnotes($A, $B) # callback + { + return $A['number'] - $B['number']; + } + + # + # Fields + # + + protected $regexAttribute = '(?:[#.][-\w]+[ ]*)'; +}