<?php
/**
 * @Author Marten van Urk
 *
 * @param String $url_val
 * @return True wanneer het een url is, False wanneer dit niet zo is.
 */
function check_url($url_val) {
    $url_pattern = "http\:\/\/[[:alnum:]\-\.]+(\.[[:alpha:]]{2,4})+";
    $url_pattern .= "(\/[\w\-]+)*"; // subdirs en rewrite urls /val_1/45/
    $url_pattern .= "((\/[\w\-\.]+\.[[:alnum:]]{2,4})?"; // bestandsnaam index.html
    $url_pattern .= "|"; // eindig met bestandsnaam of or ?
    $url_pattern .= "\/?)"; // Slash of niet
    $error_count = 0;
    if (strpos($url_val, "?")) {
        $url_parts = explode("?", $url_val);
        if (!preg_match("/^".$url_pattern."$/", $url_parts[0])) {
            $error_count++;
        }
        if (!preg_match("/^(&?[\w\-]+=\w*)+$/", $url_parts[1])) {
            $error_count++;
        }
    } else {
        if (!preg_match("/^".$url_pattern."$/", $url_val)) {
            $error_count++;
        }
    }
    if ($error_count > 0) {
        return false;
    } else {
        return true;
    }
}

/**
 * @author Marten van Urk
 *
 * @param String $text
 * @param String $base
 * @return $text
 */

function absolute($text, $base) {
  if (empty($base))
    return $text;
  // base url needs trailing /
  if (substr($base, -1, 1) != "/")
    $base .= "/";
  // Replace links
  $pattern = "/<a([^>]*) href=\"(?!http|ftp|https)([^\"]*)\"/";
  $replace = "<a\${1} href=\"" . $base . "\${2}\"";
  $text = preg_replace($pattern, $replace, $text);
  // Replace images
  $pattern = "/<img([^>]*) src=\"(?!http|ftp|https)([^\"]*)\"/";
  $replace = "<img\${1} src=\"" . $base . "\${2}\"";
  $text = preg_replace($pattern, $replace, $text);
  // Done
  return $text;
}

/**
 * @Author Marten van Urk
 *
 * @param String $url
 * @return String
 */

function getExternHtml($url) {
        /**
         * Controleren of $url wel een url is
         */
        if (check_url($url)) {
                        /**
                         * Het bestand is te bereiken dus kunnen we verder gaan.
                         *
                         */
                        $stream = file_get_contents($url);
                        $stream = str_replace('\\', '/', $stream);

                        $base = strrpos($url, '/');
                        $base = $base + 1;
                        $base = substr($url, 0, $base);
                        /**
                         * Links omzetten van relatief naar absoluut
                         */
                        $stream = absolute($stream, $base);

                        return $stream;
        } else {
                die('Geen geldige url');
        }
}
?>