Current Path : /home/ncdcgo/ele.ncdc.go.ug/repository/url/ |
Current File : /home/ncdcgo/ele.ncdc.go.ug/repository/url/locallib.php |
<?php /** * Copyright (c) 2008, David R. Nadeau, NadeauSoftware.com. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the names of David R. Nadeau or NadeauSoftware.com, nor * the names of its contributors may be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ /* * This is a BSD License approved by the Open Source Initiative (OSI). * See: http://www.opensource.org/licenses/bsd-license.php */ defined('MOODLE_INTERNAL') || die(); /** * Combine a base URL and a relative URL to produce a new * absolute URL. The base URL is often the URL of a page, * and the relative URL is a URL embedded on that page. * * This function implements the "absolutize" algorithm from * the RFC3986 specification for URLs. * * This function supports multi-byte characters with the UTF-8 encoding, * per the URL specification. * * Parameters: * baseUrl the absolute base URL. * * url the relative URL to convert. * * Return values: * An absolute URL that combines parts of the base and relative * URLs, or FALSE if the base URL is not absolute or if either * URL cannot be parsed. */ function url_to_absolute( $baseUrl, $relativeUrl ) { // If relative URL has a scheme, clean path and return. $r = split_url( $relativeUrl ); if ( $r === FALSE ) return FALSE; if ( !empty( $r['scheme'] ) ) { if ( !empty( $r['path'] ) && $r['path'][0] == '/' ) $r['path'] = url_remove_dot_segments( $r['path'] ); return join_url( $r ); } // Make sure the base URL is absolute. $b = split_url( $baseUrl ); if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) ) return FALSE; $r['scheme'] = $b['scheme']; if (empty($b['path'])) { $b['path'] = ''; } // If relative URL has an authority, clean path and return. if ( isset( $r['host'] ) ) { if ( !empty( $r['path'] ) ) $r['path'] = url_remove_dot_segments( $r['path'] ); return join_url( $r ); } unset( $r['port'] ); unset( $r['user'] ); unset( $r['pass'] ); // Copy base authority. $r['host'] = $b['host']; if ( isset( $b['port'] ) ) $r['port'] = $b['port']; if ( isset( $b['user'] ) ) $r['user'] = $b['user']; if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass']; // If relative URL has no path, use base path if ( empty( $r['path'] ) ) { if ( !empty( $b['path'] ) ) $r['path'] = $b['path']; if ( !isset( $r['query'] ) && isset( $b['query'] ) ) $r['query'] = $b['query']; return join_url( $r ); } // If relative URL path doesn't start with /, merge with base path. if ($r['path'][0] != '/') { $base = core_text::strrchr($b['path'], '/', TRUE); if ($base === FALSE) { $base = ''; } $r['path'] = $base . '/' . $r['path']; } $r['path'] = url_remove_dot_segments($r['path']); return join_url($r); } /** * Filter out "." and ".." segments from a URL's path and return * the result. * * This function implements the "remove_dot_segments" algorithm from * the RFC3986 specification for URLs. * * This function supports multi-byte characters with the UTF-8 encoding, * per the URL specification. * * Parameters: * path the path to filter * * Return values: * The filtered path with "." and ".." removed. */ function url_remove_dot_segments( $path ) { // multi-byte character explode $inSegs = preg_split( '!/!u', $path ); $outSegs = array( ); foreach ( $inSegs as $seg ) { if ( $seg == '' || $seg == '.') continue; if ( $seg == '..' ) array_pop( $outSegs ); else array_push( $outSegs, $seg ); } $outPath = implode( '/', $outSegs ); if ($path[0] == '/') { $outPath = '/' . $outPath; } // Compare last multi-byte character against '/'. if ($outPath != '/' && (core_text::strlen($path) - 1) == core_text::strrpos($path, '/', 'UTF-8')) { $outPath .= '/'; } return $outPath; } /** * This function parses an absolute or relative URL and splits it * into individual components. * * RFC3986 specifies the components of a Uniform Resource Identifier (URI). * A portion of the ABNFs are repeated here: * * URI-reference = URI * / relative-ref * * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] * * relative-ref = relative-part [ "?" query ] [ "#" fragment ] * * hier-part = "//" authority path-abempty * / path-absolute * / path-rootless * / path-empty * * relative-part = "//" authority path-abempty * / path-absolute * / path-noscheme * / path-empty * * authority = [ userinfo "@" ] host [ ":" port ] * * So, a URL has the following major components: * * scheme * The name of a method used to interpret the rest of * the URL. Examples: "http", "https", "mailto", "file'. * * authority * The name of the authority governing the URL's name * space. Examples: "example.com", "user@example.com", * "example.com:80", "user:password@example.com:80". * * The authority may include a host name, port number, * user name, and password. * * The host may be a name, an IPv4 numeric address, or * an IPv6 numeric address. * * path * The hierarchical path to the URL's resource. * Examples: "/index.htm", "/scripts/page.php". * * query * The data for a query. Examples: "?search=google.com". * * fragment * The name of a secondary resource relative to that named * by the path. Examples: "#section1", "#header". * * An "absolute" URL must include a scheme and path. The authority, query, * and fragment components are optional. * * A "relative" URL does not include a scheme and must include a path. The * authority, query, and fragment components are optional. * * This function splits the $url argument into the following components * and returns them in an associative array. Keys to that array include: * * "scheme" The scheme, such as "http". * "host" The host name, IPv4, or IPv6 address. * "port" The port number. * "user" The user name. * "pass" The user password. * "path" The path, such as a file path for "http". * "query" The query. * "fragment" The fragment. * * One or more of these may not be present, depending upon the URL. * * Optionally, the "user", "pass", "host" (if a name, not an IP address), * "path", "query", and "fragment" may have percent-encoded characters * decoded. The "scheme" and "port" cannot include percent-encoded * characters and are never decoded. Decoding occurs after the URL has * been parsed. * * Parameters: * url the URL to parse. * * decode an optional boolean flag selecting whether * to decode percent encoding or not. Default = TRUE. * * Return values: * the associative array of URL parts, or FALSE if the URL is * too malformed to recognize any parts. */ function split_url( $url, $decode=FALSE) { // Character sets from RFC3986. $xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;='; $xpchar = $xunressub . ':@% '; // Scheme from RFC3986. $xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)'; // User info (user + password) from RFC3986. $xuserinfo = '(([' . $xunressub . '%]*)' . '(:([' . $xunressub . ':%]*))?)'; // IPv4 from RFC3986 (without digit constraints). $xipv4 = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'; // IPv6 from RFC2732 (without digit and grouping constraints). $xipv6 = '(\[([a-fA-F\d.:]+)\])'; // Host name from RFC1035. Technically, must start with a letter. // Relax that restriction to better parse URL structure, then // leave host name validation to application. $xhost_name = '([a-zA-Z\d\-.%]+)'; // Authority from RFC3986. Skip IP future. $xhost = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')'; $xport = '(\d*)'; $xauthority = '((' . $xuserinfo . '@)?' . $xhost . '?(:' . $xport . ')?)'; // Path from RFC3986. Blend absolute & relative for efficiency. $xslash_seg = '(/[' . $xpchar . ']*)'; $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))'; $xpath_rel = '([' . $xpchar . ']+' . $xslash_seg . '*)'; $xpath_abs = '(/(' . $xpath_rel . ')?)'; $xapath = '(' . $xpath_authabs . '|' . $xpath_abs . '|' . $xpath_rel . ')'; // Query and fragment from RFC3986. $xqueryfrag = '([' . $xpchar . '/?' . ']*)'; // URL. $xurl = '^(' . $xscheme . ':)?' . $xapath . '?' . '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$'; // Split the URL into components. if ( !preg_match( '!' . $xurl . '!', $url, $m ) ) return FALSE; if ( !empty($m[2]) ) $parts['scheme'] = strtolower($m[2]); if ( !empty($m[7]) ) { if ( isset( $m[9] ) ) $parts['user'] = $m[9]; else $parts['user'] = ''; } if ( !empty($m[10]) ) $parts['pass'] = $m[11]; if ( !empty($m[13]) ) $h=$parts['host'] = $m[13]; else if ( !empty($m[14]) ) $parts['host'] = $m[14]; else if ( !empty($m[16]) ) $parts['host'] = $m[16]; else if ( !empty( $m[5] ) ) $parts['host'] = ''; if ( !empty($m[17]) ) $parts['port'] = $m[18]; if ( !empty($m[19]) ) $parts['path'] = $m[19]; else if ( !empty($m[21]) ) $parts['path'] = $m[21]; else if ( !empty($m[25]) ) $parts['path'] = $m[25]; if ( !empty($m[27]) ) $parts['query'] = $m[28]; if ( !empty($m[29]) ) $parts['fragment']= $m[30]; if ( !$decode ) return $parts; if ( !empty($parts['user']) ) $parts['user'] = rawurldecode( $parts['user'] ); if ( !empty($parts['pass']) ) $parts['pass'] = rawurldecode( $parts['pass'] ); if ( !empty($parts['path']) ) $parts['path'] = rawurldecode( $parts['path'] ); if ( isset($h) ) $parts['host'] = rawurldecode( $parts['host'] ); if ( !empty($parts['query']) ) $parts['query'] = rawurldecode( $parts['query'] ); if ( !empty($parts['fragment']) ) $parts['fragment'] = rawurldecode( $parts['fragment'] ); return $parts; } /** * This function joins together URL components to form a complete URL. * * RFC3986 specifies the components of a Uniform Resource Identifier (URI). * This function implements the specification's "component recomposition" * algorithm for combining URI components into a full URI string. * * The $parts argument is an associative array containing zero or * more of the following: * * "scheme" The scheme, such as "http". * "host" The host name, IPv4, or IPv6 address. * "port" The port number. * "user" The user name. * "pass" The user password. * "path" The path, such as a file path for "http". * "query" The query. * "fragment" The fragment. * * The "port", "user", and "pass" values are only used when a "host" * is present. * * The optional $encode argument indicates if appropriate URL components * should be percent-encoded as they are assembled into the URL. Encoding * is only applied to the "user", "pass", "host" (if a host name, not an * IP address), "path", "query", and "fragment" components. The "scheme" * and "port" are never encoded. When a "scheme" and "host" are both * present, the "path" is presumed to be hierarchical and encoding * processes each segment of the hierarchy separately (i.e., the slashes * are left alone). * * The assembled URL string is returned. * * Parameters: * parts an associative array of strings containing the * individual parts of a URL. * * encode an optional boolean flag selecting whether * to do percent encoding or not. Default = true. * * Return values: * Returns the assembled URL string. The string is an absolute * URL if a scheme is supplied, and a relative URL if not. An * empty string is returned if the $parts array does not contain * any of the needed values. */ function join_url( $parts, $encode=FALSE) { if ( $encode ) { if ( isset( $parts['user'] ) ) $parts['user'] = rawurlencode( $parts['user'] ); if ( isset( $parts['pass'] ) ) $parts['pass'] = rawurlencode( $parts['pass'] ); if ( isset( $parts['host'] ) && !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) ) $parts['host'] = rawurlencode( $parts['host'] ); if ( !empty( $parts['path'] ) ) $parts['path'] = preg_replace( '!%2F!ui', '/', rawurlencode( $parts['path'] ) ); if ( isset( $parts['query'] ) ) $parts['query'] = rawurlencode( $parts['query'] ); if ( isset( $parts['fragment'] ) ) $parts['fragment'] = rawurlencode( $parts['fragment'] ); } $url = ''; if ( !empty( $parts['scheme'] ) ) $url .= $parts['scheme'] . ':'; if ( isset( $parts['host'] ) ) { $url .= '//'; if ( isset( $parts['user'] ) ) { $url .= $parts['user']; if ( isset( $parts['pass'] ) ) $url .= ':' . $parts['pass']; $url .= '@'; } if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) ) $url .= '[' . $parts['host'] . ']'; // IPv6 else $url .= $parts['host']; // IPv4 or name if ( isset( $parts['port'] ) ) $url .= ':' . $parts['port']; if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' ) $url .= '/'; } if ( !empty( $parts['path'] ) ) $url .= $parts['path']; if ( isset( $parts['query'] ) ) $url .= '?' . $parts['query']; if ( isset( $parts['fragment'] ) ) $url .= '#' . $parts['fragment']; return $url; } /** * This function encodes URL to form a URL which is properly * percent encoded to replace disallowed characters. * * RFC3986 specifies the allowed characters in the URL as well as * reserved characters in the URL. This function replaces all the * disallowed characters in the URL with their repective percent * encodings. Already encoded characters are not encoded again, * such as '%20' is not encoded to '%2520'. * * Parameters: * url the url to encode. * * Return values: * Returns the encoded URL string. */ function encode_url($url) { $reserved = array( ":" => '!%3A!ui', "/" => '!%2F!ui', "?" => '!%3F!ui', "#" => '!%23!ui', "[" => '!%5B!ui', "]" => '!%5D!ui', "@" => '!%40!ui', "!" => '!%21!ui', "$" => '!%24!ui', "&" => '!%26!ui', "'" => '!%27!ui', "(" => '!%28!ui', ")" => '!%29!ui', "*" => '!%2A!ui', "+" => '!%2B!ui', "," => '!%2C!ui', ";" => '!%3B!ui', "=" => '!%3D!ui', "%" => '!%25!ui', ); $url = rawurlencode($url); $url = preg_replace(array_values($reserved), array_keys($reserved), $url); return $url; } /** * Extract URLs from a web page. * * URLs are extracted from a long list of tags and attributes as defined * by the HTML 2.0, HTML 3.2, HTML 4.01, and draft HTML 5.0 specifications. * URLs are also extracted from tags and attributes that are common * extensions of HTML, from the draft Forms 2.0 specification, from XHTML, * and from WML 1.3 and 2.0. * * The function returns an associative array of associative arrays of * arrays of URLs. The outermost array's keys are the tag (element) name, * such as "a" for <a> or "img" for <img>. The values for these entries * are associative arrays where the keys are attribute names for those * tags, such as "href" for <a href="...">. Finally, the values for * those arrays are URLs found in those tags and attributes throughout * the text. * * Parameters: * text the UTF-8 text to scan * * Return values: * an associative array where keys are tags and values are an * associative array where keys are attributes and values are * an array of URLs. * * See: * http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_web_page */ function extract_html_urls( $text ) { $match_elements = array( // HTML array('element'=>'a', 'attribute'=>'href'), // 2.0 array('element'=>'a', 'attribute'=>'urn'), // 2.0 array('element'=>'base', 'attribute'=>'href'), // 2.0 array('element'=>'form', 'attribute'=>'action'), // 2.0 array('element'=>'img', 'attribute'=>'src'), // 2.0 array('element'=>'link', 'attribute'=>'href'), // 2.0 array('element'=>'applet', 'attribute'=>'code'), // 3.2 array('element'=>'applet', 'attribute'=>'codebase'), // 3.2 array('element'=>'area', 'attribute'=>'href'), // 3.2 array('element'=>'body', 'attribute'=>'background'), // 3.2 array('element'=>'img', 'attribute'=>'usemap'), // 3.2 array('element'=>'input', 'attribute'=>'src'), // 3.2 array('element'=>'applet', 'attribute'=>'archive'), // 4.01 array('element'=>'applet', 'attribute'=>'object'), // 4.01 array('element'=>'blockquote', 'attribute'=>'cite'), // 4.01 array('element'=>'del', 'attribute'=>'cite'), // 4.01 array('element'=>'frame', 'attribute'=>'longdesc'), // 4.01 array('element'=>'frame', 'attribute'=>'src'), // 4.01 array('element'=>'head', 'attribute'=>'profile'), // 4.01 array('element'=>'iframe', 'attribute'=>'longdesc'), // 4.01 array('element'=>'iframe', 'attribute'=>'src'), // 4.01 array('element'=>'img', 'attribute'=>'longdesc'), // 4.01 array('element'=>'input', 'attribute'=>'usemap'), // 4.01 array('element'=>'ins', 'attribute'=>'cite'), // 4.01 array('element'=>'object', 'attribute'=>'archive'), // 4.01 array('element'=>'object', 'attribute'=>'classid'), // 4.01 array('element'=>'object', 'attribute'=>'codebase'), // 4.01 array('element'=>'object', 'attribute'=>'data'), // 4.01 array('element'=>'object', 'attribute'=>'usemap'), // 4.01 array('element'=>'q', 'attribute'=>'cite'), // 4.01 array('element'=>'script', 'attribute'=>'src'), // 4.01 array('element'=>'audio', 'attribute'=>'src'), // 5.0 array('element'=>'command', 'attribute'=>'icon'), // 5.0 array('element'=>'embed', 'attribute'=>'src'), // 5.0 array('element'=>'event-source','attribute'=>'src'), // 5.0 array('element'=>'html', 'attribute'=>'manifest'), // 5.0 array('element'=>'source', 'attribute'=>'src'), // 5.0 array('element'=>'video', 'attribute'=>'src'), // 5.0 array('element'=>'video', 'attribute'=>'poster'), // 5.0 array('element'=>'bgsound', 'attribute'=>'src'), // Extension array('element'=>'body', 'attribute'=>'credits'), // Extension array('element'=>'body', 'attribute'=>'instructions'), // Extension array('element'=>'body', 'attribute'=>'logo'), // Extension array('element'=>'div', 'attribute'=>'href'), // Extension array('element'=>'div', 'attribute'=>'src'), // Extension array('element'=>'embed', 'attribute'=>'code'), // Extension array('element'=>'embed', 'attribute'=>'pluginspage'), // Extension array('element'=>'html', 'attribute'=>'background'), // Extension array('element'=>'ilayer', 'attribute'=>'src'), // Extension array('element'=>'img', 'attribute'=>'dynsrc'), // Extension array('element'=>'img', 'attribute'=>'lowsrc'), // Extension array('element'=>'input', 'attribute'=>'dynsrc'), // Extension array('element'=>'input', 'attribute'=>'lowsrc'), // Extension array('element'=>'table', 'attribute'=>'background'), // Extension array('element'=>'td', 'attribute'=>'background'), // Extension array('element'=>'th', 'attribute'=>'background'), // Extension array('element'=>'layer', 'attribute'=>'src'), // Extension array('element'=>'xml', 'attribute'=>'src'), // Extension array('element'=>'button', 'attribute'=>'action'), // Forms 2.0 array('element'=>'datalist', 'attribute'=>'data'), // Forms 2.0 array('element'=>'form', 'attribute'=>'data'), // Forms 2.0 array('element'=>'input', 'attribute'=>'action'), // Forms 2.0 array('element'=>'select', 'attribute'=>'data'), // Forms 2.0 // XHTML array('element'=>'html', 'attribute'=>'xmlns'), // WML array('element'=>'access', 'attribute'=>'path'), // 1.3 array('element'=>'card', 'attribute'=>'onenterforward'), // 1.3 array('element'=>'card', 'attribute'=>'onenterbackward'),// 1.3 array('element'=>'card', 'attribute'=>'ontimer'), // 1.3 array('element'=>'go', 'attribute'=>'href'), // 1.3 array('element'=>'option', 'attribute'=>'onpick'), // 1.3 array('element'=>'template', 'attribute'=>'onenterforward'), // 1.3 array('element'=>'template', 'attribute'=>'onenterbackward'),// 1.3 array('element'=>'template', 'attribute'=>'ontimer'), // 1.3 array('element'=>'wml', 'attribute'=>'xmlns'), // 2.0 ); $match_metas = array( 'content-base', 'content-location', 'referer', 'location', 'refresh', ); // Extract all elements if ( !preg_match_all( '/<([a-z][^>]*)>/iu', $text, $matches ) ) return array( ); $elements = $matches[1]; $value_pattern = '=(("([^"]*)")|([^\s]*))'; // Match elements and attributes foreach ( $match_elements as $match_element ) { $name = $match_element['element']; $attr = $match_element['attribute']; $pattern = '/^' . $name . '\s.*' . $attr . $value_pattern . '/iu'; if ( $name == 'object' ) $split_pattern = '/\s*/u'; // Space-separated URL list else if ( $name == 'archive' ) $split_pattern = '/,\s*/u'; // Comma-separated URL list else unset( $split_pattern ); // Single URL foreach ( $elements as $element ) { if ( !preg_match( $pattern, $element, $match ) ) continue; $m = empty($match[3]) ? (!empty($match[4])?$match[4]:'') : $match[3]; if ( !isset( $split_pattern ) ) $urls[$name][$attr][] = $m; else { $msplit = preg_split( $split_pattern, $m ); foreach ( $msplit as $ms ) $urls[$name][$attr][] = $ms; } } } // Match meta http-equiv elements foreach ( $match_metas as $match_meta ) { $attr_pattern = '/http-equiv="?' . $match_meta . '"?/iu'; $content_pattern = '/content' . $value_pattern . '/iu'; $refresh_pattern = '/\d*;\s*(url=)?(.*)$/iu'; foreach ( $elements as $element ) { if ( !preg_match( '/^meta/iu', $element ) || !preg_match( $attr_pattern, $element ) || !preg_match( $content_pattern, $element, $match ) ) continue; $m = empty($match[3]) ? $match[4] : $match[3]; if ( $match_meta != 'refresh' ) $urls['meta']['http-equiv'][] = $m; else if ( preg_match( $refresh_pattern, $m, $match ) ) $urls['meta']['http-equiv'][] = $match[2]; } } // Match style attributes $urls['style'] = array( ); $style_pattern = '/style' . $value_pattern . '/iu'; foreach ( $elements as $element ) { if ( !preg_match( $style_pattern, $element, $match ) ) continue; $m = empty($match[3]) ? $match[4] : $match[3]; $style_urls = extract_css_urls( $m ); if ( !empty( $style_urls ) ) $urls['style'] = array_merge_recursive( $urls['style'], $style_urls ); } // Match style bodies if ( preg_match_all( '/<style[^>]*>(.*?)<\/style>/siu', $text, $style_bodies ) ) { foreach ( $style_bodies[1] as $style_body ) { $style_urls = extract_css_urls( $style_body ); if ( !empty( $style_urls ) ) $urls['style'] = array_merge_recursive( $urls['style'], $style_urls ); } } if ( empty($urls['style']) ) unset( $urls['style'] ); return $urls; } /** * Extract URLs from UTF-8 CSS text. * * URLs within @import statements and url() property functions are extracted * and returned in an associative array of arrays. Array keys indicate * the use context for the URL, including: * * "import" * "property" * * Each value in the associative array is an array of URLs. * * Parameters: * text the UTF-8 text to scan * * Return values: * an associative array of arrays of URLs. * * See: * http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_css_file */ function extract_css_urls( $text ) { $urls = array( ); $url_pattern = '(([^\\\\\'", \(\)]*(\\\\.)?)+)'; $urlfunc_pattern = 'url\(\s*[\'"]?' . $url_pattern . '[\'"]?\s*\)'; $pattern = '/(' . '(@import\s*[\'"]' . $url_pattern . '[\'"])' . '|(@import\s*' . $urlfunc_pattern . ')' . '|(' . $urlfunc_pattern . ')' . ')/iu'; if ( !preg_match_all( $pattern, $text, $matches ) ) return $urls; // @import '...' // @import "..." foreach ( $matches[3] as $match ) if ( !empty($match) ) $urls['import'][] = preg_replace( '/\\\\(.)/u', '\\1', $match ); // @import url(...) // @import url('...') // @import url("...") foreach ( $matches[7] as $match ) if ( !empty($match) ) $urls['import'][] = preg_replace( '/\\\\(.)/u', '\\1', $match ); // url(...) // url('...') // url("...") foreach ( $matches[11] as $match ) if ( !empty($match) ) $urls['property'][] = preg_replace( '/\\\\(.)/u', '\\1', $match ); return $urls; }