Welcome Guest, Not a member yet? Register   Sign In
Email addresses with a "+" are not parsed correctly
#13

[eluser]sophistry[/eluser]
Found a few more holes and plugged them.

I also wrote a description of the regex in comments in the code below since it's getting pretty hoary!

Finally, I wrapped some test code around it and put it head to head with the old regex auto_link email detector so you can see for yourself how lax the old one was.

Code:
<?php

class Test extends Controller {

    function Test()
    {
        parent::Controller();
    }
        
    function index()
    {
        // test on "real" addresses
        $strs = array(
                    "back|to=school~w0w.does+this^[email protected]",
                    "back{to}school-does+this^[email protected]",
                    'back{to}school#[email protected]',
                    '[email protected]',
                    '[email protected].',
                    '[email protected]..',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    'h#[email protected]',
                    '[email protected]',
                    'h$[email protected]',
                    'h%[email protected]',
                    'h&[email protected]',
                    'h*[email protected]',
                    '[email protected]',
                    'h/[email protected]',
                    "h'[email protected]",
                    '[email protected]',
                    '[email protected]',
                    'h^[email protected]',
                    '[email protected]',
                    'h`[email protected]',
                    'h{[email protected]',
                    'h}[email protected]',
                    'h|[email protected]',
                    '[email protected]',
                    'h.r@[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    '[email protected]',
                    );
        $chars_not_dot = '-a-z0-9#!$%&*+/\'=?^_`{}|~';
        
        $email_detector_regex_NEW_array = array();
        $email_detector_regex_OLD_array = array();
        foreach ($strs as $str)
        {
            // note: regex has i switch so is caseless
            // note: (?:pattern) denotes a non-capturing sub-pattern
            // so we don't get lots of junk in the matches array
            // 1st check the mailbox part of the address:
            // it must start with one char that is not a dot,
            // followed by zero or more characters (that are not
            // dots) or a single dot (but no double dots - negative lookahead assertion)
            // followed an @ sign that is not preceded by a dot (negative lookbehind assertion)
            // domain must start with alnum [a-z0-9]followed by zero or more alnums or dashes
            // followed by one more alnum then a dot (again, no double dots)
            // this restricts domain names to minimum two alnums
            // (standard 20080830 which may be changing in future ICANN spec)
            // after minimum first two alnums and single dot
            // match another alnum followed by one or more alnums, dashes or single dots (no double dots)
            // and domain must close with an alnum (not dots, no dashes)
            preg_match_all(";([$chars_not_dot](?:[$chars_not_dot]|[.](?![.]))*)(?<![.])@((?:[[:alnum:]])(?:[-[:alnum:]]*)(?:[[:alnum:]])(?:[.](?![.]))(?:[[:alnum:]])(?:[-[:alnum:]]|[.](?![.]))*(?:[[:alnum:]]));i", $str, $matches);
            //$this->_p($str);
            //$this->_p($matches);
            $email_detector_regex_NEW_array[]=array($str,@$matches[0][0],@$matches[1][0],@$matches[2][0]);
        }
        
        foreach ($strs as $str)
        {
            // the original auto_link email matcher
            preg_match_all("/([a-zA-Z0-9_\.\-]+)@([a-zA-Z0-9\-]+)\.([a-zA-Z0-9\-\.]*)/i", $str, $matches);
            //$this->_p($str);
            //$this->_p($matches);
            $email_detector_regex_OLD_array[]=array($str,@$matches[0][0],@$matches[1][0],@$matches[2][0]);
        }
        //$this->_p($email_detector_regex_NEW_array);

        $this->load->library('table');
        $this->table->set_heading('String In', 'Full pattern', 'mailbox', 'domain');
        echo '<hr>NEW regex';
        echo $this->table->generate($email_detector_regex_NEW_array);
        echo '<hr>OLD regex';
        $this->table->clear();
        $this->table->set_heading('String In', 'Full pattern', 'mailbox', 'domain');
        echo $this->table->generate($email_detector_regex_OLD_array);
    
    }
    function _p($d) {echo'<pre>';print_r($d);echo'</pre>';}
}
/* End of file test. */


Messages In This Thread
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 07:59 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 09:43 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 09:50 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 09:54 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 10:17 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 10:49 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 11:11 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-28-2008, 11:38 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-29-2008, 09:58 AM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-29-2008, 10:29 AM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-29-2008, 12:07 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-29-2008, 12:26 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 08-30-2008, 04:20 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 09-01-2008, 12:02 AM
Email addresses with a "+" are not parsed correctly - by El Forum - 09-10-2008, 10:25 PM
Email addresses with a "+" are not parsed correctly - by El Forum - 09-11-2008, 06:45 AM
Email addresses with a "+" are not parsed correctly - by El Forum - 09-11-2008, 07:47 AM



Theme © iAndrew 2016 - Forum software by © MyBB