inc/convert.php -
index
<?php
/* CONVERT.PHP - post conversion code */
// ALL THIS CODE IS GOING TO BE REPLACED AS THERE IS A BETTER WAY...
// IT WAS SUPPOSED TO BE REPLACED LAST VERSION... PERHAPS NEXT VERSION
// See also TRANSLATION.INI.
//
// The following functions are defined:
//
// convert() post conversions
// convert_user() comments
_config_convert();
/* convert - convert string according to TRANSLATE table */
// converts a string for formatted output; the string is exploded on
// newlines and treated as an array of lines; each line will be wrapped in
// P tags (or possibly DIV tags) -- which has template implications
function convert($data) {
$data = explode(THIS_EOL,$data);
$p = 0; // the one state flag
$body = ''; // return value
while (list( ,$_) = each($data)) {
if ($_ == '') {
if ($p) {
$body .= '</p>'.THIS_EOL;
$p = 0;
}
continue;
}
if (convertskip($_)) {
// this one is new and has not been tested for all
// possible conditions and we may redefine it
$_ = '<p>'.$_.'</p>';
$body .= $_ . THIS_EOL;
$p = 0;
continue;
}
if (convertlines($_)) {
$body .= $_ . THIS_EOL; // no Ps ???
$p = 0;
continue;
}
$_ = convertcodes($_);
if ($_ == "") continue;
$_ = convertinlines($_);
$m = preg_match('/^(\s+)/',$_,$res);
if (!$m || strlen($res[1]) >= 9)
$_ = convertchars($_); // _chars_
$translate = _convertlookup($_); // has flag for xlate?
if ($translate) {
$end = $translate['end'];
$pre = $translate['pre'];
$post = $translate['post'];
$first = $translate['first'];
$last = $translate['last'];
$repl = $translate['replace'];
if ($first) {
if (function_exists($repl))
$_ = $repl($_);
$str = $pre . $_.THIS_EOL;
}
else
$str = $pre.THIS_EOL;
while (list( ,$_) = each($data)) {
if (preg_match($end,$_))
break;
if (function_exists($repl))
$_ = $repl($_);
elseif ($repl)
$_ = str_replace('$1',$_,$repl);
$str .= $_.THIS_EOL;
}
if ($last == '0')
$str = substr_replace($str,'',-1);
if (preg_match('/(.*)\((.*)\)/',$post,$res)) {
$post = $res[1];
$arg = $res[2];
}
if ($last)
$str .= $_;
if (function_exists($post)) {
if (isset($arg))
$str = $post($str,$arg);
else
$str = $post($str);
}
else
$str .= $post.THIS_EOL;
$body .= $str;
if (!($t = @$translate['continue']))
prev($data); // what a bug finding this!
$p = 0;
continue;
}
// lines are turned into pragraphs but we shouldn't do it for block tags
// an we assume that all line staring with '<div>' or '<p>' are closed
// BUG FIX NOTE this does not catch all block tags
// if (!preg_match('/^<(div|p|\/div|\/p)/',$_)) {
if (preg_match('/^<.*>$/',$_)) {
if ($p) $_ .= '</p>';
$_ .= THIS_EOL;
}
else {
$p++;
if ($p == 1)
$_ = '<p>'.$_;
else
$_ = '<br>'.$_;
}
$body .= $_;
}
return rtrim($body);
}
/* convert_user - conversions on comment body */
// there are fewer translations for comments
function convert_user($data) {
$filter = config('commentfilter');
$tags = config('commenttags');
$atags = explode_tags($tags); // array of tags
if (config('logcommentdata')) // this is experimental and
$commentdata = $data; // is for some analysis
// we are conducting
$data = explode(THIS_EOL,$data);
$body = '';
while (list( ,$_) = each($data)) {
if ($_ == '') continue;
if (config('phpcomments')) {
if (preg_match('/^<\?php/',$_)) {
$php = $_.THIS_EOL;
while (list( ,$_) = each($data)) {
$php .= $_.THIS_EOL;
if (preg_match('/^\?\>/',$_))
break;
}
$body .= highlight_string($php,TRUE);
continue;
}
}
// we are probably going to add more things here
if ($filter == 'striptags')
$_ = our_strip_tags($_,$tags,$atags);
else
$_ = htmlspecialchars($_,ENT_NOQUOTES);
$_ = convertchars($_); // ' * ^ | _
$body .= '<p>'.$_.'</p>'.THIS_EOL;
}
if (isset($commentdata)) {
$commentdata = str_replace('--','-',$commentdata);
$body .= '<!-- '.$commentdata.' -->';
}
return rtrim($body);
}
// strip and fix un-closed tags
/*
this fixes un-closed tags per line, i.e. a comment like
Hi, <b>Jenny,
and <i>Tom</i></b>
will end up
Hi, <b>Jenny,</b>
and <i>Tom</i></b>
loose end tags don't seem to be a problem with most web browsers
*/
function our_strip_tags($_, $tags, $tagsarray) {
$_ = preg_replace('/<\s+/','<',$_); // strip_tags() errs if ' '
$_ = strip_tags($_,$tags);
if ($tags) {
foreach ($tagsarray as $a => $b) {
$A = substr_count($_,$a); // count and
$B = substr_count($_,$b);
while ($A > $B++) // append if missing
$_ .= $b;
}
}
return $_;
}
// "<a><b>" to array("<a>"=>"</a>","<b>"=>"</b>") (should be done just once but...)
function explode_tags($tags) {
if (!$tags)
return array();
$a = explode('>',$tags);
array_pop($a);
foreach ($a as $t)
$atags[$t.'>'] = preg_replace('/<(.*)/','</$1>',$t);
return $atags;
}
/*
Start of all the "conversion" routines. Note that this code has "spun-off"
into it's own API -- http://gajennings.net/gmlp/ -- and that code will
eventually be used instead.
*/
/* convertskip - skip over lines that match a [skip] RE */
// this occurs first in the process and continues if match
function convertskip(&$_) {
global $translate;
if (!($skip = @$translate['skip']))
return FALSE;
foreach ($skip as $s)
if (preg_match($s,$_,$res)) {
if (isset($res[1]))
$_ = $res[1];
return TRUE;
}
return FALSE;
}
/* convertlines - convert lines according to [lines] */
// this occurs second in process and continues if match
function convertlines(&$_) {
global $translate;
if (!($lines = @$translate['lines']))
return FALSE;
foreach ($lines as $re => $func)
if (preg_match($re,$_,$res)) {
if ($func == 'continue')
return TRUE;
if (function_exists($func)) {
$_ = $func($res[1]);
return TRUE;
}
else {
$_ = preg_replace($re,$func,$_);
return TRUE;
}
}
return FALSE;
}
/* convertsub - calls other conversion functions */
// a place way to reduce overall length of function convert
function convertsub($_) {
$_ = convertcodes($_);
if ($_ == "") return $_;
$_ = convertinlines($_);
$m = preg_match('/^(\s+)/',$_,$res);
if (!$m || strlen($res[1]) >= 9)
$_ = convertchars($_); // _chars_
return $_;
}
// we absolutely do not like that test for leading spaces, but it's needed
// to resolve the conflict between the /^\s{9,}/ [pregs] setting and the
// [:leadingspace] translation (which is /^\s+/) -- we want a RE that goes like
// "none or more than 8" (could do two preg_matches, but that'd be too odd)
/* convertcodes - convert to the [codes] section of the translate.ini file */
function convertcodes($_) {
global $translate;
if (!($codes = @$translate['codes']))
return $_;
// this has an example of how to turn on and off potential "Compilation failed"
// warnings -- the duplicate code is so that the test is not inside the loop!
// there are potentially many loops over every line in every post displayed!
// we only have 3 REs in the [code] section of the INI file right now, but who
// is to say we won't have many more some day? so that is 3 calls to
// preg_match_all() for every line in every post! and then a function call and
// a substitution for every match!
// actually, we just should not use the error operator at all and let the
// warnings show up... but we have designed this code to let it be modified
// live! which means you can log in, make a change -- and make a mistake --
// and then every post view by every user can see potentially hundreds of
// horrible warnings!
// we do not need an error handler to deal with this; the most efficient way,
// as we see it, is to just ignore the error; do your testing of new REs
// locally first without the error operator...
// we could though, and this is the goal if this code ever gets big, build
// into Admin a temporary INI file edit process that "tests" the data before
// allowing any changes to be made
if (config('showpregerrors')) {
foreach ($codes as $re => $func)
if (preg_match_all($re,$_,$res))
foreach ($res[0] as $k => $v)
$_ = str_replace($v,$func($res[1][$k]),$_);
} else {
foreach ($codes as $re => $func)
if (@preg_match_all($re,$_,$res))
foreach ($res[0] as $k => $v)
$_ = str_replace($v,$func($res[1][$k]),$_);
}
return $_;
}
/* convertinlines - convert according to [inlines] of translate.ini */
function convertinlines($_) {
global $translate;
if (!($inlines = @$translate['inlines']))
return $_;
foreach ($inlines as $preg => $replace)
if ($t = @preg_replace($preg,$replace,$_))
$_ = $t;
return $_;
}
/* convertchars - character translations */
function convertchars($_) {
global $translate;
if (!isset($translate['chars']))
return $_;
// do not xlate these:
// need to reexamine this
if (preg_match('/^<!--/',$_)) return $_;
if (preg_match('/-->$/',$_)) return $_;
if (preg_match('/^http:\/\//',$_)) return $_;
if (config('entitytranslate'))
$_ = convertentities($_);
foreach ($translate['chars'] as $k => $v)
$_ = _xchars($_,'\\'.$k,$v);
return $_;
}
/* convertentities - convert according to [entities] of translate.ini */
// we want to be able to turn these off
function convertentities($_) {
global $translate;
if (!($entities = @$translate['entities']))
return $_;
// NOTE the anti-scream thing and use of $t is to silently avoid syntax
// errors in the INI file -- this may be a good thing or a bad thing
// depending on your point of view...
foreach ($entities as $preg => $replace)
if ($t = @preg_replace($preg,$replace,$_))
$_ = $t;
return $_;
}
// I once was exceedingly annoyed using a blogapp that translated dozens of
// characters into HTML entities (now called "named characters" by W3C).
// And then we started doing it! UGH! Now it's off by default.
// ALL UPPER CASE = <p><b>All Upper Case</b></p> (note the Ps)
// for in a [code]
function convertcase($s) {
$s = ucwords(strtolower($s));
return '<p><b>'.$s.'</b></p>';
}
function _xchars($str, $char, $tag) {
// I've never had so much trouble with a spot of code as I have with this one.
// I am always finding ways to break it!
// today I added =; for the start and ; for the end
// only to find today that I forgot about: " the *word*; more text..." grrrr!
$repl = '/([^a-zA-Z=;])'.$char.'(.*)'.$char.'([^a-zA-Z])/U';
$tags = '$1<'.$tag.'>$2</'.$tag.'>$3';
$str = ' '.$str.' '; // put in two spaces!
$s = preg_replace($repl,$tags,$str);
$s = substr_replace($s,'',-1); // get out the spaces!
$s = substr_replace($s,'',0,1);
return $s;
}
// getting the xchars RE was tricky as we need to handle various conditions:
// beginning of sentence: "*Bold* word."
// trailing punctuation: "Italic ^word^."
// preserve inside chars: "'$identifier_with_underscore'"
// etc.
// but we also want to be as simple as possible!
// a sideeffect of the function is that three (''') result in:
// <tag></tag>char
/* load the translation table */
function _config_convert() {
global $translate;
$translate = configfile('translate.ini');
if (!$translate) return;
$defaults = $translate['translate']['defaults'];
unset($translate['translate']['defaults']);
// this just puts defaults in if any missing:
foreach ($translate['translate'] as $trans => $value)
foreach ($defaults as $name => $value)
if (!isset($translate['translate'][$trans][$name]))
$translate['translate'][$trans][$name] = $value;
}
/* index into translation table and return it if string match */
function _convertlookup($_) {
global $translate;
if (!$translate) return NULL;
foreach ($translate['translate'] as $trans)
if (preg_match($trans['begin'],$_))
return $trans;
return NULL;
}
/*
Nobody will ever let you know,
when you ask the reasons why.
They'll just tell you
that your on your own.
Fill your head
all full of lies...
-- Sabbath Bloody Sabbath
*/
?>