【问题标题】:SimplePie regex errorsSimplePie 正则表达式错误
【发布时间】:2012-03-22 17:56:07
【问题描述】:

1.3 开发版 SimplePie 在 xampp 上使用 PHP 5.3。

我能够获取 RSS 提要并显示它,但每次获取的项目都会出现以下错误:

警告:preg_match() [function.preg-match]:编译失败:在 C:\xampp\htdocs\simplepie.php 第 5877 行的偏移 562 处没有重复内容

警告:preg_match() [function.preg-match]:编译失败:在 C:\xampp\htdocs\simplepie.php 第 5965 行的偏移 509 处没有可重复的内容

警告:preg_match() [function.preg-match]:编译失败:在 C:\xampp\htdocs\simplepie.php 第 6031 行的偏移 509 处没有可重复的内容

发生错误的函数:

    /**
 * Parse RFC2822's date format
 *
 * @access protected
 * @return int Timestamp
 */
public function date_rfc2822($date)
{
    static $pcre;
    if (!$pcre)
    {
        $wsp = '[\x09\x20]';
        $fws = '(?:' . $wsp . '+|' . $wsp . '*(?:\x0D\x0A' . $wsp . '+)+)';
        $optional_fws = $fws . '?';
        $day_name = $this->day_pcre;
        $month = $this->month_pcre;
        $day = '([0-9]{1,2})';
        $hour = $minute = $second = '([0-9]{2})';
        $year = '([0-9]{2,4})';
        $num_zone = '([+\-])([0-9]{2})([0-9]{2})';
        $character_zone = '([A-Z]{1,5})';
        $zone = '(?:' . $num_zone . '|' . $character_zone . ')';
        $pcre = '/(?:' . $optional_fws . $day_name . $optional_fws . ',)?' . $optional_fws . $day . $fws . $month . $fws . $year . $fws . $hour . $optional_fws . ':' . $optional_fws . $minute . '(?:' . $optional_fws . ':' . $optional_fws . $second . ')?' . $fws . $zone . '/i';
    }
    if (preg_match($pcre, $this->remove_rfc2822_comments($date), $match))

/**
 * Parse RFC850's date format
 *
 * @access protected
 * @return int Timestamp
 */
public function date_rfc850($date)
{
    static $pcre;
    if (!$pcre)
    {
        $space = '[\x09\x20]+';
        $day_name = $this->day_pcre;
        $month = $this->month_pcre;
        $day = '([0-9]{1,2})';
        $year = $hour = $minute = $second = '([0-9]{2})';
        $zone = '([A-Z]{1,5})';
        $pcre = '/^' . $day_name . ',' . $space . $day . '-' . $month . '-' . $year . $space . $hour . ':' . $minute . ':' . $second . $space . $zone . '$/i';
    }
    if (preg_match($pcre, $date, $match))

/**
 * Parse C99's asctime()'s date format
 *
 * @access protected
 * @return int Timestamp
 */
public function date_asctime($date)
{
    static $pcre;
    if (!$pcre)
    {
        $space = '[\x09\x20]+';
        $wday_name = $this->day_pcre;
        $mon_name = $this->month_pcre;
        $day = '([0-9]{1,2})';
        $hour = $sec = $min = '([0-9]{2})';
        $year = '([0-9]{4})';
        $terminator = '\x0A?\x00?';
        $pcre = '/^' . $wday_name . $space . $mon_name . $space . $day . $space . $hour . ':' . $min . ':' . $sec . $space . $year . $terminator . '$/i';
    }
    if (preg_match($pcre, $date, $match))

错误引用的行是每个函数的最后一个 if 表达式 (you can see full code here)。

我认为每个函数的每个 $pcre 中都存在错误的正则表达式。

谢谢

【问题讨论】:

    标签: php regex simplepie


    【解决方案1】:

    如果正则表达式有任何问题,它不应该编译。
    但是,$this->day_pcre$this->month_pcre 可能包含可能会使正则表达式变坏的元字符。最好检查一下。

    我替换了“Mon”和“Oct”并在 Ideone 上运行。似乎工作。

    作为旁注,您可能想要交换 $fws -

    来自$fws = '(?:' . $wsp . '+|' . $wsp . '*(?:\x0D\x0A' . $wsp . '+)+)'
    $fws = '(?:(?:(?:\x0D\x0A)?' . $wsp . ')+)'

    因为它们是等效的并且可能更有效。

    在您的函数中,您应该打印出
    $day/$month/$pcre 变量的正则表达式。你还能指望如何调试它?

    可能是别的,我不知道。

    这是我得到的:http://ideone.com/zJ5vE

    代码

    <?php
    
    date_asctime( "Mon Oct 21 11:21:31 2012\x0A" );
    date_asctime( "Mon Oct 22 12:22:32 2012\x0A" );
    date_asctime( "Mon Oct 23 13:23:33 2012\x0A" );
    
    print("==================\n");
    
    date_rfc2822( 'Mon, 21 Oct 2012 21:01 -1011' );
    date_rfc2822( 'Mon, 22 Oct 2012 22:02 -1012' );
    date_rfc2822( 'Mon, 23 Oct 2012 23:03 -1013' );
    
    
    /**
     * Parse C99's asctime()'s date format
     *
     * @access protected
     * @return int Timestamp
     */
    function date_asctime($date)
    {
        static $pcre;
        if (!$pcre)
        {
            $space = '[\x09\x20]+';
    
            $wday_name = 'Mon';  //$this->day_pcre;
            $mon_name = 'Oct';   //$this->month_pcre;
    
            $day = '([0-9]{1,2})';
            $hour = $sec = $min = '([0-9]{2})';
            $year = '([0-9]{4})';
            $terminator = '\x0A?\x00?';
            $pcre = '/^' . $wday_name . $space . $mon_name . $space . $day . $space . $hour . ':' . $min . ':' . $sec . $space . $year . $terminator . '$/i';
        }
        if (preg_match($pcre, $date, $match))
        {
           print_r($match);
        }
    }
    
    
    /**
     * Parse RFC2822's date format
     *
     * @access protected
     * @return int Timestamp
     */
    function date_rfc2822($date)
    {
        static $pcre;
        if (!$pcre)
        {
            $wsp = '[\x09\x20]';
    
             // $fws = '(?:' . $wsp . '+|' . $wsp . '*(?:\x0D\x0A' . $wsp . '+)+)';
            $fws = '(?:(?:(?:\x0D\x0A)?' . $wsp . ')+)';
            $optional_fws = $fws . '?';
    
            $day_name = 'Mon';  //$this->day_pcre;
            $month = 'Oct';     //$this->month_pcre;
    
            $day = '([0-9]{1,2})';
            $hour = $minute = $second = '([0-9]{2})';
            $year = '([0-9]{2,4})';
            $num_zone = '([+\-])([0-9]{2})([0-9]{2})';
            $character_zone = '([A-Z]{1,5})';
            $zone = '(?:' . $num_zone . '|' . $character_zone . ')';
            $pcre = '/(?:' . $optional_fws . $day_name . $optional_fws . ',)?' . $optional_fws . $day . $fws . $month . $fws . $year . $fws . $hour . $optional_fws . ':' . $optional_fws . $minute . '(?:' . $optional_fws . ':' . $optional_fws . $second . ')?' . $fws . $zone . '/i';
        }
        // if (preg_match($pcre, $this->remove_rfc2822_comments($date), $match))
        if (preg_match($pcre, $date, $match))
        {
           print_r($match);
        }
    } 
    ?>
    

    输出

    Array
    (
        [0] => Mon Oct 21 11:21:31 2012
    
        [1] => 21
        [2] => 11
        [3] => 21
        [4] => 31
        [5] => 2012
    )
    Array
    (
        [0] => Mon Oct 22 12:22:32 2012
    
        [1] => 22
        [2] => 12
        [3] => 22
        [4] => 32
        [5] => 2012
    )
    Array
    (
        [0] => Mon Oct 23 13:23:33 2012
    
        [1] => 23
        [2] => 13
        [3] => 23
        [4] => 33
        [5] => 2012
    )
    ==================
    Array
    (
        [0] => Mon, 21 Oct 2012 21:01 -1011
        [1] => 21
        [2] => 2012
        [3] => 21
        [4] => 01
        [5] => 
        [6] => -
        [7] => 10
        [8] => 11
    )
    Array
    (
        [0] => Mon, 22 Oct 2012 22:02 -1012
        [1] => 22
        [2] => 2012
        [3] => 22
        [4] => 02
        [5] => 
        [6] => -
        [7] => 10
        [8] => 12
    )
    Array
    (
        [0] => Mon, 23 Oct 2012 23:03 -1013
        [1] => 23
        [2] => 2012
        [3] => 23
        [4] => 03
        [5] => 
        [6] => -
        [7] => 10
        [8] => 13
    )
    

    【讨论】:

    • 谢谢。 $this->day_pcre 和 $this->month_pcre 变量包含不同语言中每个可能的日期和月份的模式,我删除了除英语之外的所有语言,并且错误消失了。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2016-11-19
    • 1970-01-01
    • 2013-11-26
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多