390 lines
9.4 KiB
PHP
390 lines
9.4 KiB
PHP
<?php
|
|
/**
|
|
* @copyright Copyright (c) 2014 Carsten Brandt
|
|
* @license https://github.com/cebe/markdown/blob/master/LICENSE
|
|
* @link https://github.com/cebe/markdown#readme
|
|
*/
|
|
|
|
namespace cebe\markdown;
|
|
use ReflectionMethod;
|
|
|
|
/**
|
|
* A generic parser for markdown-like languages.
|
|
*
|
|
* @author Carsten Brandt <mail@cebe.cc>
|
|
*/
|
|
abstract class Parser
|
|
{
|
|
/**
|
|
* @var integer the maximum nesting level for language elements.
|
|
*/
|
|
public $maximumNestingLevel = 32;
|
|
|
|
/**
|
|
* @var array the current context the parser is in.
|
|
* TODO remove in favor of absy
|
|
*/
|
|
protected $context = [];
|
|
/**
|
|
* @var array these are "escapeable" characters. When using one of these prefixed with a
|
|
* backslash, the character will be outputted without the backslash and is not interpreted
|
|
* as markdown.
|
|
*/
|
|
protected $escapeCharacters = [
|
|
'\\', // backslash
|
|
];
|
|
|
|
private $_depth = 0;
|
|
|
|
|
|
/**
|
|
* Parses the given text considering the full language.
|
|
*
|
|
* This includes parsing block elements as well as inline elements.
|
|
*
|
|
* @param string $text the text to parse
|
|
* @return string parsed markup
|
|
*/
|
|
public function parse($text)
|
|
{
|
|
$this->prepare();
|
|
|
|
if (ltrim($text) === '') {
|
|
return '';
|
|
}
|
|
|
|
$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
|
|
|
|
$this->prepareMarkers($text);
|
|
|
|
$absy = $this->parseBlocks(explode("\n", $text));
|
|
$markup = $this->renderAbsy($absy);
|
|
|
|
$this->cleanup();
|
|
return $markup;
|
|
}
|
|
|
|
/**
|
|
* Parses a paragraph without block elements (block elements are ignored).
|
|
*
|
|
* @param string $text the text to parse
|
|
* @return string parsed markup
|
|
*/
|
|
public function parseParagraph($text)
|
|
{
|
|
$this->prepare();
|
|
|
|
if (ltrim($text) === '') {
|
|
return '';
|
|
}
|
|
|
|
$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
|
|
|
|
$this->prepareMarkers($text);
|
|
|
|
$absy = $this->parseInline($text);
|
|
$markup = $this->renderAbsy($absy);
|
|
|
|
$this->cleanup();
|
|
return $markup;
|
|
}
|
|
|
|
/**
|
|
* This method will be called before `parse()` and `parseParagraph()`.
|
|
* You can override it to do some initialization work.
|
|
*/
|
|
protected function prepare()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* This method will be called after `parse()` and `parseParagraph()`.
|
|
* You can override it to do cleanup.
|
|
*/
|
|
protected function cleanup()
|
|
{
|
|
}
|
|
|
|
|
|
// block parsing
|
|
|
|
private $_blockTypes;
|
|
|
|
/**
|
|
* @return array a list of block element types available.
|
|
*/
|
|
protected function blockTypes()
|
|
{
|
|
if ($this->_blockTypes === null) {
|
|
// detect block types via "identify" functions
|
|
$reflection = new \ReflectionClass($this);
|
|
$this->_blockTypes = array_filter(array_map(function($method) {
|
|
$name = $method->getName();
|
|
return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
|
|
}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
|
|
|
|
sort($this->_blockTypes);
|
|
}
|
|
return $this->_blockTypes;
|
|
}
|
|
|
|
/**
|
|
* Given a set of lines and an index of a current line it uses the registed block types to
|
|
* detect the type of this line.
|
|
* @param array $lines
|
|
* @param integer $current
|
|
* @return string name of the block type in lower case
|
|
*/
|
|
protected function detectLineType($lines, $current)
|
|
{
|
|
$line = $lines[$current];
|
|
$blockTypes = $this->blockTypes();
|
|
foreach($blockTypes as $blockType) {
|
|
if ($this->{'identify' . $blockType}($line, $lines, $current)) {
|
|
return $blockType;
|
|
}
|
|
}
|
|
// consider the line a normal paragraph if no other block type matches
|
|
return 'paragraph';
|
|
}
|
|
|
|
/**
|
|
* Parse block elements by calling `detectLineType()` to identify them
|
|
* and call consume function afterwards.
|
|
*/
|
|
protected function parseBlocks($lines)
|
|
{
|
|
if ($this->_depth >= $this->maximumNestingLevel) {
|
|
// maximum depth is reached, do not parse input
|
|
return [['text', implode("\n", $lines)]];
|
|
}
|
|
$this->_depth++;
|
|
|
|
$blocks = [];
|
|
|
|
// convert lines to blocks
|
|
for ($i = 0, $count = count($lines); $i < $count; $i++) {
|
|
$line = $lines[$i];
|
|
if ($line !== '' && rtrim($line) !== '') { // skip empty lines
|
|
// identify a blocks beginning and parse the content
|
|
list($block, $i) = $this->parseBlock($lines, $i);
|
|
if ($block !== false) {
|
|
$blocks[] = $block;
|
|
}
|
|
}
|
|
}
|
|
|
|
$this->_depth--;
|
|
|
|
return $blocks;
|
|
}
|
|
|
|
/**
|
|
* Parses the block at current line by identifying the block type and parsing the content
|
|
* @param $lines
|
|
* @param $current
|
|
* @return array Array of two elements, the first element contains the block,
|
|
* the second contains the next line index to be parsed.
|
|
*/
|
|
protected function parseBlock($lines, $current)
|
|
{
|
|
// identify block type for this line
|
|
$blockType = $this->detectLineType($lines, $current);
|
|
|
|
// call consume method for the detected block type to consume further lines
|
|
return $this->{'consume' . $blockType}($lines, $current);
|
|
}
|
|
|
|
protected function renderAbsy($blocks)
|
|
{
|
|
$output = '';
|
|
foreach ($blocks as $block) {
|
|
array_unshift($this->context, $block[0]);
|
|
$output .= $this->{'render' . $block[0]}($block);
|
|
array_shift($this->context);
|
|
}
|
|
return $output;
|
|
}
|
|
|
|
/**
|
|
* Consume lines for a paragraph
|
|
*
|
|
* @param $lines
|
|
* @param $current
|
|
* @return array
|
|
*/
|
|
protected function consumeParagraph($lines, $current)
|
|
{
|
|
// consume until newline
|
|
$content = [];
|
|
for ($i = $current, $count = count($lines); $i < $count; $i++) {
|
|
if (ltrim($lines[$i]) !== '') {
|
|
$content[] = $lines[$i];
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
$block = [
|
|
'paragraph',
|
|
'content' => $this->parseInline(implode("\n", $content)),
|
|
];
|
|
return [$block, --$i];
|
|
}
|
|
|
|
/**
|
|
* Render a paragraph block
|
|
*
|
|
* @param $block
|
|
* @return string
|
|
*/
|
|
protected function renderParagraph($block)
|
|
{
|
|
return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
|
|
}
|
|
|
|
|
|
// inline parsing
|
|
|
|
|
|
/**
|
|
* @var array the set of inline markers to use in different contexts.
|
|
*/
|
|
private $_inlineMarkers = [];
|
|
|
|
/**
|
|
* Returns a map of inline markers to the corresponding parser methods.
|
|
*
|
|
* This array defines handler methods for inline markdown markers.
|
|
* When a marker is found in the text, the handler method is called with the text
|
|
* starting at the position of the marker.
|
|
*
|
|
* Note that markers starting with whitespace may slow down the parser,
|
|
* you may want to use [[renderText]] to deal with them.
|
|
*
|
|
* You may override this method to define a set of markers and parsing methods.
|
|
* The default implementation looks for protected methods starting with `parse` that
|
|
* also have an `@marker` annotation in PHPDoc.
|
|
*
|
|
* @return array a map of markers to parser methods
|
|
*/
|
|
protected function inlineMarkers()
|
|
{
|
|
$markers = [];
|
|
// detect "parse" functions
|
|
$reflection = new \ReflectionClass($this);
|
|
foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
|
|
$methodName = $method->getName();
|
|
if (strncmp($methodName, 'parse', 5) === 0) {
|
|
preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
|
|
foreach($matches[1] as $match) {
|
|
$markers[$match] = $methodName;
|
|
}
|
|
}
|
|
}
|
|
return $markers;
|
|
}
|
|
|
|
/**
|
|
* Prepare markers that are used in the text to parse
|
|
*
|
|
* Add all markers that are present in markdown.
|
|
* Check is done to avoid iterations in parseInline(), good for huge markdown files
|
|
* @param string $text
|
|
*/
|
|
protected function prepareMarkers($text)
|
|
{
|
|
$this->_inlineMarkers = [];
|
|
foreach ($this->inlineMarkers() as $marker => $method) {
|
|
if (strpos($text, $marker) !== false) {
|
|
$m = $marker[0];
|
|
// put the longest marker first
|
|
if (isset($this->_inlineMarkers[$m])) {
|
|
reset($this->_inlineMarkers[$m]);
|
|
if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
|
|
$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
|
|
continue;
|
|
}
|
|
}
|
|
$this->_inlineMarkers[$m][$marker] = $method;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses inline elements of the language.
|
|
*
|
|
* @param string $text the inline text to parse.
|
|
* @return array
|
|
*/
|
|
protected function parseInline($text)
|
|
{
|
|
if ($this->_depth >= $this->maximumNestingLevel) {
|
|
// maximum depth is reached, do not parse input
|
|
return [['text', $text]];
|
|
}
|
|
$this->_depth++;
|
|
|
|
$markers = implode('', array_keys($this->_inlineMarkers));
|
|
|
|
$paragraph = [];
|
|
|
|
while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
|
|
|
|
$pos = strpos($text, $found);
|
|
|
|
// add the text up to next marker to the paragraph
|
|
if ($pos !== 0) {
|
|
$paragraph[] = ['text', substr($text, 0, $pos)];
|
|
}
|
|
$text = $found;
|
|
|
|
$parsed = false;
|
|
foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
|
|
if (strncmp($text, $marker, strlen($marker)) === 0) {
|
|
// parse the marker
|
|
array_unshift($this->context, $method);
|
|
list($output, $offset) = $this->$method($text);
|
|
array_shift($this->context);
|
|
|
|
$paragraph[] = $output;
|
|
$text = substr($text, $offset);
|
|
$parsed = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!$parsed) {
|
|
$paragraph[] = ['text', substr($text, 0, 1)];
|
|
$text = substr($text, 1);
|
|
}
|
|
}
|
|
|
|
$paragraph[] = ['text', $text];
|
|
|
|
$this->_depth--;
|
|
|
|
return $paragraph;
|
|
}
|
|
|
|
/**
|
|
* Parses escaped special characters.
|
|
* @marker \
|
|
*/
|
|
protected function parseEscape($text)
|
|
{
|
|
if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
|
|
return [['text', $text[1]], 2];
|
|
}
|
|
return [['text', $text[0]], 1];
|
|
}
|
|
|
|
/**
|
|
* This function renders plain text sections in the markdown text.
|
|
* It can be used to work on normal text sections for example to highlight keywords or
|
|
* do special escaping.
|
|
*/
|
|
protected function renderText($block)
|
|
{
|
|
return $block[1];
|
|
}
|
|
}
|