<?php

// ======================================================= // 
// Spiegel.de RDF Feed - SpiegelOnline Schlagzeilen in RDF // 
//                                                         // 
// written by Henrik Teichmann <t@yaos.de> & <www.yaos.de> // 
//                                                         // 
// Thanks to yah for recode_charset()                      // 
// and nils for regex help                                 // 
//                                                         // 
// The given Content is Copyrighted by Spiegel.De Online   // 
//                                                         // 
// ======================================================= // 


if(is_file("./data.txt")) {
  if(fileatime("./data.txt")+5400 < time()) {
    $file = file("http://www.spiegel.de/schlagzeilen/");
    $pointer = fopen("./data.txt", 'w+');
    fwrite($pointer, implode("", $file));
    fclose($pointer);
  } else {
    $file = file("./data.txt");
  }
}

$News = Array();

foreach($file as $line) {
  if(eregi("<div class=\"markvisited\">", $line)) {
    $parse = "on";
  }
  
  // Schlagzeilen ueberschrift (Tag usw)
  if($parse == "on" && eregi("^<p>", $line)) {
    $line = preg_replace("/<(.*?)>/", "", $line);
    $line = trim($line);
    list($timestamp, $day, $month, $year) = ParseDate($line);
  }
  
  if($parse == "on" && eregi("^<a", $line)) {
     // Title
     preg_match("/<font color=\"black\">(.*?)<\/font>/", $line, $match);
     $title = trim($match[1]);
     $title = strip_tags($title);
     $title = recode_charset($title, "HTML", "ASCII");
     $title = recode_charset($title, "ASCII", "UNICODE");

     // Datum der schlagzeile und Kategorie
     preg_match("/<font color=\"#777777\".*>(.*?)<\/font>/", $line, $match);
     $line_date = trim($match[1]);
     $line_date = eregi_replace("[(,)]", "", $line_date);
     list($categorie, $line_date) = split(" ", $line_date, 2);
     $categorie = trim($categorie);
     preg_match("/\s(\d{2})\:(\d{2})/", $line_date, $zeit);
     $line_date = mktime($zeit[1], $zeit[2], 0, $month, $day, $year);
     $line_date."<br>";

     // URL
     preg_match("/href=\"(.*?)\"/", $line, $match);
     $url = trim($match[1]);
     if(!eregi("^http", $url)) {
       $url = "http://www.spiegel.de".$url;
     }
     $url = recode_charset($url, "HTML", "ASCII");
     $url = recode_charset($url, "ASCII", "UNICODE");

     // Alles in Array packen
     $News[] = Array(
                      "title" => $title,
                      "url" => $url,
                      "categorie" => $categorie,
                      "date" => $line_date
                     );
  }
  
  if($parse == "on" && eregi("</div>", $line)) {
    $parse = "off";
  }
}

// Jetzt wird das RDF erstellt
Header("Content-Type: text/xml");
print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n";
print "<rdf:RDF\n";
print "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n";
print "xmlns=\"http://my.netscape.com/rdf/simple/0.9/\">\n\n";

for($i=0;$i<count($News);$i++) {
  extract($News[$i]);
  if(!empty($title) && !empty($url)) {
    print "<item>\n";
    print "  <title>".$title."</title>\n";
    print "  <url>".$url."</url>\n";
    print "  <categorie>".$categorie."</categorie>\n";
    print "  <date>".$line_date."</date>\n";
    print "</item>\n\n";
  }
}

print "</rdf:RDF>\n";

// ########################
// Different functions for
// ########################

function ParseDate($date) {
  list(, $date) = split(",", $date);
  list(, $day, $month, $year) = split("\ ", $date);
  $day = ereg_replace("\.", "", $day);
  $month = FormatMonth($month);
  return Array(mktime(0, 0, 0, $month, $day, $year), $day, $month, $year);
}

function FormatMonth($month) {
  switch(strtolower($month)) {
    case "januar":
      return "01";
      break;
    case "februar":
      return "02";
      break;
    case "m&auml;rz":
      return "03";
      break;
    case "april":
      return "04";
      break;
    case "mai":
      return "05";
      break;
    case "juni":
      return "06";
      break;
    case "juli":
      return "07";
      break;
    case "august":
      return "08";
      break;
    case "september":
      return "09";
      break;
    case "oktober":
      return "10";
      break;
    case "november":
      return "11";
      break;
    case "dezember":
      return "12";
      break;
  }
}

function recode_charset($string,$cs_from,$cs_to) {
  $trans_tbl["ASCII"] = array("&", "¡", "¢", "£", "¤", "¥", "¦", "§", "¨", "©", "ª", "«", "¬", "­", "®", "¯", "°", "±", "²", "³", "´", "µ", "¶", "·", "¸", "¹", "º", "»", "¼", "½", "¾", "¿", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ", "Ö", "×", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "Þ", "ß", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "÷", "ø", "ù", "ú", "û", "ü", "ý", "þ", "ÿ");

  $trans_tbl["UNICODE"] = array("&#x26;", "&#161;", "&#162;", "&#163;", "&#164;", "&#165;", "&#166;", "&#167;", "&#168;", "&#169;", "&#170;", "&#171;", "&#172;", "&#173;", "&#174;", "&#175;", "&#176;", "&#177;", "&#178;", "&#179;", "&#180;", "&#181;", "&#182;", "&#183;", "&#184;", "&#185;", "&#186;", "&#187;", "&#188;", "&#189;", "&#190;", "&#191;", "&#192;", "&#193;", "&#194;", "&#195;", "&#196;", "&#197;", "&#198;", "&#199;", "&#200;", "&#201;", "&#202;", "&#203;", "&#204;", "&#205;", "&#206;", "&#207;", "&#208;", "&#209;", "&#210;", "&#211;", "&#212;", "&#213;", "&#214;", "&#215;", "&#216;", "&#217;", "&#218;", "&#219;", "&#220;", "&#221;", "&#222;", "&#223;", "&#224;", "&#225;", "&#226;", "&#227;", "&#228;", "&#229;", "&#230;", "&#231;", "&#232;", "&#233;", "&#234;", "&#235;", "&#236;", "&#237;", "&#238;", "&#239;", "&#240;", "&#241;", "&#242;", "&#243;", "&#244;", "&#245;", "&#246;", "&#247;", "&#248;", "&#249;", "&#250;", "&#251;", "&#252;", "&#253;", "&#254;", "&#255;");

  $trans_tbl["HTML"] = array("&amp;", "&iexcl;", "&cent;", "&pound;", "&curren;", "&yen;", "&brvbar;", "&sect;", "&uml;", "&copy;", "&ordf;", "&laquo;", "&not;", "&shy;", "&reg;", "&macr;", "&deg;", "&plusmn;", "&sup2;", "&sup3;", "&acute;", "&micro;", "&para;", "&middot;", "&cedil;", "&sup1;", "&ordm;", "&raquo;", "&frac14;", "&frac12;", "&frac34;", "&iquest;", "&Agrave;", "&Aacute;", "&Acirc;", "&Atilde;", "&Auml;", "&Aring;", "&AElig;", "&Ccedil;", "&Egrave;", "&Eacute;", "&Ecirc;", "&Euml;", "&Igrave;", "&Iacute;", "&Icirc;", "&Iuml;", "&ETH;", "&Ntilde;", "&Ograve;", "&Oacute;", "&Ocirc;", "&Otilde;", "&Ouml;", "&times;", "&Oslash;", "&Ugrave;", "&Uacute;", "&Ucirc;", "&Uuml;", "&Yacute;", "&THORN;", "&szlig;", "&agrave;", "&aacute;", "&acirc;", "&atilde;", "&auml;", "&aring;", "&aelig;", "&ccedil;", "&egrave;", "&eacute;", "&ecirc;", "&euml;", "&igrave;", "&iacute;", "&icirc;", "&iuml;", "&eth;", "&ntilde;", "&ograve;", "&oacute;", "&ocirc;", "&otilde;", "&ouml;", "&divide;", "&oslash;", "&ugrave;", "&uacute;", "&ucirc;", "&uuml;", "&yacute;", "&thorn;", "&yuml;");

  return str_replace($trans_tbl[$cs_from],  $trans_tbl[$cs_to],  $string);
}

?>
