* @license http://www.opensource.org/licenses/mit-license.php The MIT License * @link http://d.hatena.ne.jp/anatoo/ * $Rev$ */ class HatenaSyntax { protected $firstCharSyntaxes = array(); protected $inlineSyntaxes = array(); protected $footnoteSyntax; protected $markupSyntaxes = array(); protected $openingTags = array(); protected $closingTags = array(); protected $options; public function __construct($options = array()) { if(!is_array($options)) throw new Exception('argument is not array.'); $this->options = $options; // 汎用文法の追加 // 汎用といってもDefaultとBlockquoteはちょっと特殊です // 理由はgetFirstCharSyntaxIdentifierを参照 $this->addFirstCharSyntax(new HatenaSyntax_Head($this->getOption('headlevel', 3))) ->addFirstCharSyntax(new HatenaSyntax_Table()) ->addFirstCharSyntax(new HatenaSyntax_DefinitionList()) ->addFirstCharSyntax(new HatenaSyntax_Default()) ->addFirstCharSyntax(new HatenaSyntax_Blockquote(false)) ->addFirstCharSyntax(new HatenaSyntax_Blockquote(true)) ->addFirstCharSyntax(new HatenaSyntax_List()) ->addFirstCharSyntax(new HatenaSyntax_List(true)); $this->addMarkupSyntax(new HatenaSyntax_Pre(false, $this->getOption('htmlescape', false))) ->addMarkupSyntax(new HatenaSyntax_Pre(true)); $this->addInlineSyntax(new HatenaSyntax_Link($this->getOption('htmlescape', false))); // 非汎用文法 // applyInlineSyntaxでついでにこの文法も適用される。 $this->footnoteSyntax = new HatenaSyntax_Footnote($this->getOption('id', '')); } protected function getOption($key, $default = false) { if(!isset($this->options[$key])) return $default; return $this->options[$key]; } public function addMarkupSyntax(HatenaSyntax_MarkupSyntaxInterface $syntax) { $this->markupSyntaxes[$syntax->getOpeningIdentifier()] = $syntax; $this->openingTags[$syntax->getOpeningIdentifier()] = true; $this->closingTags[$syntax->getClosingIdentifier()] = true; return $this; } public function addFirstCharSyntax(HatenaSyntax_FirstCharSyntaxInterface $syntax) { $this->firstCharSyntaxes[$syntax->getIdentifier()] = $syntax; return $this; } public function addInlineSyntax(HatenaSyntax_InlineSyntaxInterface $syntax) { $this->inlineSyntaxes[] = $syntax; return $this; } public function parseStructure($contents) { $openingTags = $this->openingTags; $closingTags = $this->closingTags; $markupSyntaxes = $this->markupSyntaxes; $firstCharSyntaxes = $this->firstCharSyntaxes; $lines = $this->getLines($contents); $result = array(); for($i = 0, $length = count($lines); $i < $length; $i++) { $block = array('lines' => array()); // ブロック文法の種類の確認 // MarkupSyntaxだったら if(isset($openingTags[$lines[$i]])) { $block['syntax'] = $markupSyntaxes[$lines[$i]]; $block['type'] = 'markup'; for($i++; $i < $length; $i++) { if(isset($closingTags[$lines[$i]])) break; $block['lines'][] = $lines[$i]; } } // FirstCharSyntaxだったら else { $firstChar = $this->getFirstCharSyntaxIdentifier($lines[$i]); $block['syntax'] = $firstCharSyntaxes[$firstChar]; $block['type'] = 'firstchar'; for(; $i < $length; $i++) { if($firstChar !== $this->getFirstCharSyntaxIdentifier($lines[$i])) break; $block['lines'][] = $lines[$i]; } $i--; } $result[] = $block; } return $result; } public function parse($contents) { $structure = $this->parseStructure($contents); $result = array(); $htmlEscape = $this->getOption('htmlescape'); $result[] = '
' . PHP_EOL; foreach($structure as $block) { if($block['type'] === 'markup') foreach($block['lines'] as $line) $block['syntax']->parse($line); else foreach($block['lines'] as $line) { if($htmlEscape) $line = htmlspecialchars($line, ENT_QUOTES); $line = $this->applyInlineSyntax($line); $block['syntax']->parse($line); } $result[] = $block['syntax']->getResult(); } $result[] = '
' . PHP_EOL . PHP_EOL; $result[] = $this->getFootnoteSyntaxResult(); return implode('', $result); } public function getLines($contents) { return split("\r\n|\n|\r", $contents); } public function getFootnoteSyntaxResult() { return $this->footnoteSyntax->getResult(); } public function applyInlineSyntax($line) { foreach($this->inlineSyntaxes as $syntax) { $line = $syntax->parse($line); } $line = $this->footnoteSyntax->parse($line); return $line; } protected function getFirstCharSyntaxIdentifier($line) { if($line === '<<') return '<<'; elseif($line === '>>') return '>>'; $char = substr($line, 0, 1); if(isset($this->firstCharSyntaxes[$char])) return $char; if(isset($this->openingTags[ $line])) return false; return 'default'; } } /** * リストや見出し、表等のはてな記法を表現するためのインターフェイス */ interface HatenaSyntax_FirstCharSyntaxInterface { public function getIdentifier(); public function parse($line); public function getResult(); } /** * リンクなどのはてな記法を表現するためのインターフェイス */ interface HatenaSyntax_InlineSyntaxInterface { public function parse($line); } /** * pre記法等のためのインターフェイス */ interface HatenaSyntax_MarkupSyntaxInterface { public function getOpeningIdentifier(); public function getClosingIdentifier(); public function parse($line); public function getResult(); } class HatenaSyntax_Table implements HatenaSyntax_FirstCharSyntaxInterface { protected $table = array(); public function getIdentifier() { return '|'; } public function parse($line) { $_ = explode('|', $line); array_pop($_); array_shift($_); $this->table[] = $_; } public function getResult() { $table = $this->table; $this->table = array(); $result = '' . PHP_EOL; foreach($table as $col) { $result .= ''; foreach($col as $cell) { if(substr($cell, 0, 1) === '*') { $result .= ''; } else { $result .= ''; } } $result .= '' . PHP_EOL; } $result .= '
' . substr($cell, 1) . '' . $cell . '
' . PHP_EOL; return $result; } } class HatenaSyntax_Pre implements HatenaSyntax_MarkupSyntaxInterface { protected $result = ''; protected $superPreFlag; protected $htmlEscape; public function __construct($superPreFlag = false, $htmlEscape = false) { $this->superPreFlag = !!$superPreFlag; $this->htmlEscape = !!$htmlEscape; } public function getOpeningIdentifier() { return $this->superPreFlag ? '>||' : '>|'; } public function getClosingIdentifier() { return $this->superPreFlag ? '||<' : '|<'; } public function parse($line) { $this->result .= $line .PHP_EOL; } public function getResult() { $result = $this->superPreFlag || $this->htmlEscape ? htmlspecialchars($this->result, ENT_QUOTES) : $this->result; $this->result = ''; return '
' . PHP_EOL . $result . '
' . PHP_EOL; } } class HatenaSyntax_Footnote { protected $footnotes = array(); protected $id; public function __construct($id = '') { $this->id = empty($id) ? '': '_' . $id; } public function parse($line) { while(true) { if(($result = $this->getFootnote($line)) === false) break; $this->footnotes[] = $result[1]; $num = count($this->footnotes); $id = $this->id; $line = $result[0] . '(*' . $num . ')' . $result[2] . PHP_EOL; } return $line; } public function getFootnote($line) { if(($_ = $this->seekFootnote($line)) === false) return false; $left = substr($line, 0, $_[0]); $contents = substr($line, $_[0] + 2, $_[1] - $_[0] - 2); $right = substr($line, $_[1] + 2); return array($left, $contents, $right); } public function seekFootnote($line) { $opening = strpos($line, '(('); if($opening === false) return false; $closing = strpos($line, '))', $opening); if($closing === false) return false; return array($opening, $closing); } public function getResult() { $footnotes = $this->footnotes; $this->footnotes = array(); $result = '
' . PHP_EOL; $id = $this->id; foreach($footnotes as $num => $note) $result .= '

*' . $num . ': ' . $note . '

' . PHP_EOL; $result .= '
' . PHP_EOL; return $result; } } class HatenaSyntax_Link implements HatenaSyntax_InlineSyntaxInterface { protected $escaping; public function __construct($flag = false) { $this->escaping = !!$flag; } public function parse($line) { return preg_replace_callback('/\[(https?:\/\/[^:]+)(:title=([^\]]*))?\]/', array($this, 'process'), $line); } public function process($matches) { // 間違ってるように見えるが間違っていない $url = $this->escaping ? $matches[1] : htmlspecialchars($matches[1], ENT_QUOTES); $cont = isset($matches[3]) ? ($this->escaping ? $matches[3] : htmlspecialchars($matches[3], ENT_QUOTES)) : $url; return "{$cont}"; } } class HatenaSyntax_List implements HatenaSyntax_FirstCharSyntaxInterface { protected $list = array(); protected $identifier; public function __construct($orderedFlag = false) { $this->identifier = $orderedFlag ? '+' : '-'; } public function getIdentifier() { return $this->identifier; } public function parse($line) { $level = $this->countLevel($line); $list =& $this->list; for($i = 0; $i < $level; $i++) { if(count($list) > 0) { if(!is_array($list[count($list) - 1])) $list[] = array(); $list =& $list[count($list) - 1]; } else { $list =& $list[]; } } $list[] = substr($line, $level); } /** * $lineの先頭の+,-の深さを数える */ public function countLevel($line) { $level = 0; $len = strlen($line); for($i = 0; $i < $len; $i++) { if($line[$i] === '-' || $line[$i] === '+') $level++; else break; } return $level - 1; } public function getResult() { $result = $this->buildList($this->list); $this->list = array(); return $result; } /** * 配列からリストを表現するタグを生成 */ public function buildList($arr) { if(count($arr) > 0 && !is_array($arr[0])) $tagName = (substr($arr[0], 0, 1) === '+') ? 'ol': 'ul'; else $tagName = $this->identifier === '+' ? 'ol': 'ul'; $result = '<' . $tagName . '>' . PHP_EOL; foreach($arr as $item) { if(is_array($item)) $result .= $this->buildList($item); else { $item = substr($item, 1); $result .= '
  • ' . $item . '
  • ' . PHP_EOL; } } $result .= '' . PHP_EOL; return $result; } } class HatenaSyntax_Head implements HatenaSyntax_FirstCharSyntaxInterface { protected $result = ''; protected $baseLevel; public function __construct($baseLevel = 3) { $baseLevel = (int)$baseLevel; if($baseLevel <= 0) throw new Exception("invalid argument: {$baseLevel}"); $this->baseLevel = $baseLevel; } public function getIdentifier() { return '*'; } public function parse($line) { $base = $this->baseLevel; $level = $this->countLevel($line); $this->result .= '' . substr($line, $level) . '' . PHP_EOL; } /** * $lineの先頭の*の数を数える */ public function countLevel($line) { $level = 0; $len = strlen($line); for($i = 0; $i < $len || $i < 3; $i++) { if($line[$i] === '*') $level++; else break; } return $level; } public function getResult() { $result = $this->result; $this->result = ''; return $result; } } class HatenaSyntax_Blockquote implements HatenaSyntax_FirstCharSyntaxInterface { protected $opening; public function __construct($opening) { $this->opening = !!$opening; } public function getIdentifier() { return $this->opening ? '>>': '<<'; } public function parse($line) {} public function getResult() { return ($this->opening ? '
    ': '
    ') . PHP_EOL; } } class HatenaSyntax_DefinitionList implements HatenaSyntax_FirstCharSyntaxInterface { protected $items; public function getIdentifier() { return ':'; } public function parse($line) { $this->items[] = explode(':', substr($line, 1), 2); } public function getResult() { $result = '
    ' . PHP_EOL; foreach($this->items as $item) { $result .= $item[0] === '' ? '' :'
    ' . $item[0] . '
    ' . PHP_EOL; if(isset($item[1])) $result .= '
    ' . $item[1] . '
    ' . PHP_EOL; } $this->items = array(); $result .= '
    ' . PHP_EOL; return $result; } } class HatenaSyntax_Default implements HatenaSyntax_FirstCharSyntaxInterface { protected $result = ''; protected $newLineFlag = false; public function getIdentifier() { return 'default'; } public function parse($line) { if($line === '') { /** * 何もない行が二つ続いたら改行(
    )を挿入 */ if($this->newLineFlag) { $this->result .= '
    ' . PHP_EOL; $this->newLineFlag = false; } else { $this->newLineFlag = true; } $this->result .= PHP_EOL; } else { $this->result .= '

    ' . $line . '

    ' . PHP_EOL; $this->newLineFlag = false; } } public function getResult() { $result = $this->result; $this->result = ''; $this->newLineFlag = false; return $result; } }