FindProxyForURL - Parse a PAC (proxy auto config) file in PHP

A PAC file is a Javascript file evaluated by web browsers to determine which proxy server to use for certain URL's.

Here's a working example of using the php_spidermonkey extension to run a proxy auto config script (javascript) in PHP.

All you need is a PAC file to test it.  You can write your own following instructions across the web, here is a guide: http://www.ee.ed.ac.uk/%7Emfg/work/proxy-live.html

There is more information and Windows builds of php_spidermonkey in this article: Javascript in PHP on Windows with php_spidermonkey.  Install the extension dll in PHP in the usual way.

Here is a snippet of code that uses the class 'CProxy' (defined underneath) which executes the Javascript PAC function FindProxyForURL:


Code formatted by http://hilite.me/
use DEMO\PAC\CProxy; // namespace, if required.
 
$proxy = new CProxy();
$proxy->LoadPAC('C:\proxy.pac'); // pac file is usually a URL
$test = $proxy->FindProxyForAddress('http://www.google.com/', true);
echo $test;

/*
Assuming your PAC file contained something like this:

function FindProxyForURL(url, host)
{
if ( shExpMatch(host, "*.google.*") )
{
return "PROXY 10.20.30.40:8000; DIRECT";
}
return "DIRECT";
}

this snippet would print:
PROXY 10.20.30.40:8000; DIRECT
*/

Definition of class CProxy - it utilises and demonstrates some of the following:
  1. PHP 5.4 - uses namespaces to show how they work in this scenario, and traits.
  2. php_spidermonkey javascript interpreter
  3. PAC (proxy-auto-config) functions 

<?php
/* This example uses a namespace, just to show how it works inside namespaces.
Namespace can be removed. */

namespace DEMO\PAC;
use \JSContext, \Exception;

if ( !defined('JSVERSION_DEFAULT') )
{
error_log(__FILE__ . 'CProxy not loaded, php_spidermonkey extension not detected!');
return;
}
 
// global function to escape javascript strings: 
function js($str, $quotes = '\'"')
{
return addcslashes($str,"\\$quotes\n\r<>");
}
 
class CProxy 
{
use PACFunctions; // include traits in this class, defined below.

private $pac, $js, $proxyconfig;

function __construct($pacfile=NULL, $strictJavascript=false)
{
// requires php_spidermonkey.dll extension.
$js = new JSContext();

if ( $strictJavascript )
{
$opts = $js->getOptions() | JSOPTION_STRICT | JSOPTION_WERROR;
$prev = $js->setOptions($opts);
}

$js->registerFunction(__NAMESPACE__.'\CProxy::myIpAddress', 'myIpAddress');
$js->registerFunction(__NAMESPACE__.'\CProxy::isPlainHostName', 'isPlainHostName');
$js->registerFunction(__NAMESPACE__.'\CProxy::dnsDomainIs', 'dnsDomainIs');
$js->registerFunction(__NAMESPACE__.'\CProxy::localHostOrDomainIs', 'localHostOrDomainIs');
$js->registerFunction(__NAMESPACE__.'\CProxy::isResolvable', 'isResolvable');
$js->registerFunction(__NAMESPACE__.'\CProxy::isInNet', 'isInNet');
$js->registerFunction(__NAMESPACE__.'\CProxy::dnsResolve', 'dnsResolve');
$js->registerFunction(__NAMESPACE__.'\CProxy::dnsDomainLevels', 'dnsDomainLevels');
$js->registerFunction(__NAMESPACE__.'\CProxy::shExpMatch', 'shExpMatch');
$js->registerFunction(__NAMESPACE__.'\CProxy::weekdayRange', 'weekdayRange');
$js->registerFunction(__NAMESPACE__.'\CProxy::dateRange', 'dateRange');
$js->registerFunction(__NAMESPACE__.'\CProxy::timeRange', 'timeRange');
// I don't know what ProxyConfig is used for:
$this->proxyconfig = new \stdClass();
$this->proxyconfig->bindings = array();
$js->assign('ProxyConfig', $this->proxyconfig);

$this->js = $js;
$this->LoadPAC($pacfile);
}

function FindProxyForAddress($address, $raw=false)
{
/* returns an array of proxy servers
(from preg_match_all),
or, if $raw is true, returns
the original string returned by
FindProxyForURL function. */

if ( !$this->pac )
return false;

$parts = parse_url($address);
$url = $address;

if ( !isset($parts['port']) )
{
$port = '';
}
else if ( ($port=$parts['port']) )
{
$port = ':' . $port;
}

if ( !isset($parts['host']) )
{
/* $address is badly formed, not sure if this is right, but
* return it from the beginning to the first slash. */
if ( !isset($parts['path']) )
return false;
$parts['host'] = preg_replace('/(.*?)(?>\/.*$|$)/', '$1', $parts['path']);
}

$host = $parts['host'] . $port;

try
{
if ( !($rv=$this->js->evaluateScript('FindProxyForURL(\'' . js($url) . '\', \'' . js($host) . '\');', 'FindProxyForURL')) || $raw )
{
return $rv;
}
}
catch ( Exception $ex )
{
return false;
}
return self::ReadPacResult($rv);
}

function LoadPAC($pac)
{
if ( $this->pac )
{
return NULL; // already loaded
}

if ( $pac === NULL || !$this->js )
{
return false; // param error or not initialised properly
}

if ( !($script = file_get_contents($pac)) )
{
return false; // cannot read PAC file
}

try
{
$rv = $this->js->evaluateScript($script, $pac);
}
catch ( Exception $ex )
{
return false;
}

$this->pac = $script;
return true;
}

public static function ReadPacResult($pacres)
{
if ( !$pacres )
return false;

// $pacres should be something like
// PROXY 1.2.3.4:8080; PROXY 1.2.3.5:8080; DIRECT
if ( !preg_match_all('/(PROXY|SOCKS|DIRECT)\s*([^;]*|.*?$)/ui', $pacres, $matches, PREG_SET_ORDER) )
{
return false;
}
return $matches;
}
}
?>

    PAC functions written as a trait class (remember - not tested very much!):

    Code formatted by http://hilite.me/
    <?php
    /* Define the PAC traits used in class CProxy, above. */

    namespace DEMO\PAC;
    use \DateTime, \DateTimeZone, \Exception;

    trait PACFunctions
    {
    private static $s_myIP;

    /*
    * PAC functions:
    * 'global' functions to replicate javascript PAC environment...
    */

    static function myIpAddress()
    {
    if ( self::$s_myIP )
    {
    return self::$s_myIP;
    }

    $vars = array('SERVER_ADDR', 'LOCAL_ADDR');
    $ip = NULL;
    $bestip = '0.0.0.0';

    foreach ( $vars as $var )
    {
    if ( isset($_SERVER[$var]) )
    {
    $ip = $_SERVER[$var];
    if ( !empty($ip) )
    {
    if ( $ip != '127.0.0.1' )
    {
    return (self::$s_myIP = $ip);
    }
    $bestip = $ip;
    }
    }
    }

    if ( ($hn = gethostname()) && ($ip = gethostbyname($hn)) && $ip !== $hn )
    {
    return (self::$s_myIP=$ip);
    }

    return (self::$s_myIP=$bestip);
    }

    static function isPlainHostName( $host )
    {
    return strpos($host, '.')===false;
    }

    static function shExpMatch($host, $pattern)
    {
    $parts = explode('*', $pattern);
    $pattern = '';
    foreach( $parts as $part )
    {
    if ( $part === '' )
    {
    $pattern .= '.*?';
    }
    else
    {
    $pattern .= preg_quote($part, '/');
    }
    }
    return preg_match('/^'.$pattern.'$/ui', $host)===1;
    }

    static function isResolvable($host)
    {
    if ( ip2long($host) !== false )
    {
    return true;
    }
    $test = gethostbyname($host);
    return $test && ip2long($test) !== false;
    }

    static function isInNet($host, $ip, $subnet)
    {
    if ( ($longhost = ip2long($host)) === false )
    {
    $host = gethostbyname($host);
    if ( ($longhost = ip2long($host))===false )
    {
    return false;
    }
    }
    $longip = ip2long($ip);
    $longsub = ip2long($subnet);
    return ($longhost&$longsub) == $longip;
    }

    static function dnsDomainIs($host, $domain)
    {
    $len = strlen($domain);
    return $len <= strlen($host) && substr($host, -$len)==$domain;
    }

    static function localHostOrDomainIs($host, $domain)
    {
    return strcasecmp($domain, substr($host, 0, strlen($domain)))===0;
    }

    static function dnsResolve($host)
    {
    return gethostbyname($host);
    }

    static function dnsDomainLevels($host)
    {
    return substr_count($host, '.');
    }

    static function weekdayRange($fr, $to=NULL, $gmt=NULL)
    {
    $days = array('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun');
    $fr = strtolower(substr($fr,0,3));
    if ( $gmt )
    $gmt = strtoupper($gmt);
    if ( $to === NULL )
    $to = $fr;
    else if ( strcasecmp($to, 'GMT')==0 )
    {
    $to = $fr;
    $gmt = 'GMT';
    }
    else
    $to = strtolower(substr($to,0,3));

    if ( $gmt == 'GMT' )
    {
    $tzn = 'UTC';
    }
    else if ( !($tzn = ini_get('date.timezone')) )
    {
    $tzn = 'Europe/London';
    }
    $tz = new DateTimeZone($tzn);
    $today = strtolower( (new DateTime('now',$tz))->format('D') );

    foreach ( $days as $wd=>$day )
    {
    if ( $fr == $day )
    $fr = $wd+1;
    if ( $to == $day )
    $to = $wd+1;
    if ( $today == $day )
    $today = $wd+1;
    }

    if ( !(int)$today || !(int)$fr || !(int)$to )
    return false;

    // FRI = 5
    // MON = 1
    // THU = 4
    // is THU in FRI - MON ??
    if ( $fr > $to )
    {
    return $today >= $fr || $today <= $to;
    }

    return $today >= $fr && $today <= $to;
    }

    static function dateRange($day1, $month1=NULL, $year1=NULL, $day2=NULL, $month2=NULL, $year2=NULL, $gmt=NULL)
    {
    $args = array($day1, $month1, $year1, $day2, $month2, $year2, $gmt);

    $RationaliseArgs = function(&$arg)
    {
    $isgmt = 0;

    for ( $ct=0 ; $ct < 6 ; $ct += 3 )
    {
    if ( (int)$arg[$ct] > 31 )
    {
    // 1st arg is a year, insert blank day and month
    array_splice($arg, $ct, 0, array(0,''));
    }
    else if ( (int)$arg[$ct] )
    {
    // 1st arg is day
    if ( (int)$arg[$ct+1] )
    {
    // 2nd arg must be a day. 2nd arg can't be year if 1st arg is a day.
    array_splice($arg, $ct+1, 0, array('',0)); // insert blank month and year
    }
    else if ( $arg[$ct+1]===NULL || strcasecmp($arg[$ct+1], 'GMT')==0 )
    {
    // nothing else after the day.
    array_splice($arg, $ct+1, 0, array('',0)); // insert blank month and year.
    }
    else if ( (int)$arg[$ct+2] && (int)$arg[$ct+2] <= 31 ) // month supplied, test what's after month
    {
    // month followed by day... insert blank year...
    array_splice($arg, $ct+2, 0, array(0)); // insert empty year1 and shift arg2 to 2nd date.
    }
    }
    else if ( is_string($arg[$ct]) )
    {
    if ( strcasecmp($arg[$ct], 'GMT')==0 )
    {
    if ( !$ct )
    {
    return false; // error, 1st arg cannot be 'GMT'.
    }
    break;
    }
    // 1st arg is month
    array_splice($arg, $ct, 0, array(0)); // insert a blank day.
    if ( (int)$arg[$ct+2] <= 31 ) // month not followed by a year
    {
    // insert a blank year...
    array_splice($arg, $ct+2, 0, array(0));
    }
    }
    else
    {
    if ( $ct && $arg[$ct] === NULL ) // only 1 date supplied. ok.
    return 1;
    return false; // error
    }
    }

    if ( strcasecmp($arg[$ct], 'GMT')==0 )
    {
    $isgmt = 1;
    $arg[$ct] = 'GMT';
    }

    array_splice($arg, 7);

    return $ct/3;
    };

    if ( !($cd = $RationaliseArgs($args, $gmt)) )
    {
    return false;
    }

    list($day1, $month1, $year1, $day2, $month2, $year2, $gmt) = $args;

    if ( $gmt == 'GMT' )
    {
    $tzn = 'UTC';
    }
    else if ( !($tzn = ini_get('date.timezone')) )
    {
    $tzn = 'Europe/London';
    }
    $tz = new DateTimeZone($tzn);
    $now = new DateTime('now',$tz);

    $thisyear = $now->format('Y');
    $thismonth = 'Jan'; // a month with 31 days.
    $thisday = $now->format('d');

    try
    {
    $from = new \DateTime(($year1 ? $year1 : $thisyear) . '-' . ($month1 ? $month1 : $thismonth) . '-' . ($day1 ? $day1 : $thisday), $tz);
    if ( !$day1 )
    $day2 = 0;
    if ( !$month1 )
    $month2 = '';
    if ( !$year1 )
    $year2 = 0;
    if ( $day2 || $month2 || $year2 )
    {
    $to = new \DateTime(($year2 ? $year2 : $thisyear) . '-' . ($month2 ? $month2 : $thismonth) . '-' . ($day2 ? $day2 : $thisday), $tz);
    }
    else
    $to = NULL;
    }
    catch ( Exception $ex )
    {
    return false; // data error, an arg is incorrect
    }

    $dateformat = ($year1 ? 'Y' : '') . ($month1 ? 'm' : '') . ($day1 ? 'd' : '');

    $test = $now->format($dateformat);
    $lhs = $from->format($dateformat);

    if ( !$to )
    {
    return $test == $lhs;
    }

    $rhs = $to->format($dateformat);
    if ( $rhs < $lhs && $dateformat{0} != 'Y' )
    {
    return $test >= $lhs || $test <= $rhs;
    }
    return $test >= $lhs && $test <= $rhs;
    }

    static function timeRange($hour1, $min1=NULL, $sec1=NULL, $hour2=NULL, $min2=NULL, $sec2=NULL, $gmt=NULL)
    {
    $args = array($hour1, $min1, $sec1, $hour2, $min2, $sec2, $gmt);
    for ( $ct=0 ; $ct < count($args) && $args[$ct] !== NULL && strcasecmp($args[$ct], 'GMT') ; ++$ct ) ;

    if ( !$ct )
    return false; // arg error

    if ( ($gmt = (strcasecmp($args[$ct], 'GMT')==0)) )
    {
    $tzn = 'UTC';
    }
    else if ( !($tzn = ini_get('date.timezone')) )
    {
    $tzn = 'Europe/London';
    }
    $tz = new DateTimeZone($tzn);
    $now = new DateTime('now',$tz);

    /* $ct is the number of args (excluding any 'GMT' value).
    * if 6 args supplied, full time specified, else... */
    if ( $ct == 4 ) // range of hours+minutes
    {
    // hour and minute supplied
    $min2 = $hour2;
    $hour2 = $sec1;
    $sec1 = $sec2 = 0;
    $fmt = 'Hi';
    }
    else if ( $ct < 3 ) // range of hours (1 or 2)
    {
    $hour2 = ($ct==2 ? $min1 : $hour1);
    $min1 = $sec1 = $min2 = $sec2 = 0;
    $fmt = 'H';
    }
    else if ( $ct == 6 )
    {
    $fmt = 'His';
    }
    else
    {
    return false; // parameters not specified properly
    }

    try
    {
    $lhs = new \DateTime( ($dt=$now->format('Y-m-d')) . ' ' . (int)$hour1 . ':' . (int)$min1 . ':' . $sec1, $tz);
    $rhs = (( $ct == 1 ) ? NULL : new \DateTime( $dt . ' ' . (int)$hour2 . ':' . (int)$min2 . ':' . $sec2, $tz));
    }
    catch ( Exception $ex )
    {
    // datetime not recognised, so parameter probably out of range
    return false;
    }

    $nowtime = $now->format($fmt);
    $ltime = $lhs->format($fmt);
    if ( $rhs === NULL )
    {
    return $nowtime == $ltime;
    }
    $rtime = $rhs->format($fmt);
    if ( $ltime > $rtime )
    {
    return $nowtime >= $ltime || $nowtime <= $rtime;
    }
    return $nowtime >= $ltime && $nowtime <= $rtime;
    }
    }
    ?>