search.php

00001 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
00002 <html>
00003 <head>
00004 <title></title>
00005 <meta name="generator" content="Quanta">
00006 <meta name="author" content="Jason Lucas">
00007 <meta name="date" content="2007-04-25T21:08:43+0100">
00008 <meta name="copyright" content="KTechLab.org">
00009 <meta name="keywords" content="ktechlab,electronics,PIC,microcontroller,IDE,design,simulation,emulation,circuit,linux,KDE,educational,hobby,flowcode,microbe">
00010 <meta name="description" content="KTechLab is an IDE for microcontrollers and electronics">
00011 <meta http-equiv="content-type" content="text/html; charset=UTF-8">
00012 <meta http-equiv="content-style-type" content="text/css">
00013 <meta http-equiv="expires" content="0">
00014 <LINK href="ktechlab.css" rel="stylesheet" type="text/css">
00015 </head>
00016 <BODY BGCOLOR="#FFFFFF">
00017 
00018 <!-- Header Table -->
00019 <TABLE WIDTH="760" BORDER="0" CELLSPACING="0" CELLPADDING="2" align="center" valign="top">
00020 <TR>
00021         <TD WIDTH="16">
00022         &nbsp;  
00023 
00024         </TD>
00025         <TD WIDTH="728" align="center">
00026                 <a href="../index.htm"><img src="../images/ktlbanner.png" width="728" height="90" border="0" alt="KTechlab.org"></a>
00027         </TD>
00028         <TD WIDTH="16">
00029                 &nbsp;
00030         </TD>
00031 </TR>
00032 </TABLE>
00033 <!-- Body Table -->
00034 <TABLE width="760" cellspacing="0" border="0" cellpadding="0" align="center">
00035 <TR>
00036         <TD WIDTH="760" valign="top" align="left" class="body"> 
00037                 
00038 <!-- Generated by Doxygen 1.5.1 -->
00039 <div class="tabs">
00040   <ul>
00041     <li><a href="index.html"><span>Main&nbsp;Page</span></a></li>
00042     <li><a href="namespaces.html"><span>Namespaces</span></a></li>
00043     <li><a href="classes.html"><span>Classes</span></a></li>
00044     <li><a href="files.html"><span>Files</span></a></li>
00045     <li><a href="dirs.html"><span>Directories</span></a></li>
00046     <li><a href="pages.html"><span>Related&nbsp;Pages</span></a></li>
00047     <li>
00048       <form action="search.php" method="get">
00049         <table cellspacing="0" cellpadding="0" border="0">
00050           <tr>
00051             <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
00052 
00053 <?php
00054 
00055 function search_results()
00056 {
00057   return "Search Results";
00058 }
00059 
00060 function matches_text($num)
00061 {
00062   if ($num==0)
00063   {
00064     return "Sorry, no documents matching your query.";
00065   }
00066   else if ($num==1)
00067   {
00068     return "Found <b>1</b> document matching your query.";
00069   }
00070   else // $num>1
00071   {
00072     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00073   }
00074 }
00075 
00076 function report_matches()
00077 {
00078   return "Matches: ";
00079 }
00080 function end_form($value)
00081 {
00082   echo "            <td><input type=\"text\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"s\"/></td>\n          </tr>\n        </table>\n      </form>\n    </li>\n  </ul>\n</div>\n";
00083 }
00084 
00085 function readInt($file)
00086 {
00087   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00088   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00089   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00090 }
00091 
00092 function readString($file)
00093 {
00094   $result="";
00095   while (ord($c=fgetc($file))) $result.=$c;
00096   return $result;
00097 }
00098 
00099 function readHeader($file)
00100 {
00101   $header =fgetc($file); $header.=fgetc($file);
00102   $header.=fgetc($file); $header.=fgetc($file);
00103   return $header;
00104 }
00105 
00106 function computeIndex($word)
00107 {
00108   // Fast string hashing
00109   //$lword = strtolower($word);
00110   //$l = strlen($lword);
00111   //for ($i=0;$i<$l;$i++)
00112   //{
00113   //  $c = ord($lword{$i});
00114   //  $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;
00115   //}
00116   //return $v;
00117 
00118   // Simple hashing that allows for substring search
00119   if (strlen($word)<2) return -1;
00120   // high char of the index
00121   $hi = ord($word{0});
00122   if ($hi==0) return -1;
00123   // low char of the index
00124   $lo = ord($word{1});
00125   if ($lo==0) return -1;
00126   // return index
00127   return $hi*256+$lo;
00128 }
00129 
00130 function search($file,$word,&$statsList)
00131 {
00132   $index = computeIndex($word);
00133   if ($index!=-1) // found a valid index
00134   {
00135     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00136     $index = readInt($file);
00137     if ($index) // found words matching the hash key
00138     {
00139       $start=sizeof($statsList);
00140       $count=$start;
00141       fseek($file,$index);
00142       $w = readString($file);
00143       while ($w)
00144       {
00145         $statIdx = readInt($file);
00146         if ($word==substr($w,0,strlen($word)))
00147         { // found word that matches (as substring)
00148           $statsList[$count++]=array(
00149               "word"=>$word,
00150               "match"=>$w,
00151               "index"=>$statIdx,
00152               "full"=>strlen($w)==strlen($word),
00153               "docs"=>array()
00154               );
00155         }
00156         $w = readString($file);
00157       }
00158       $totalHi=0;
00159       $totalFreqHi=0;
00160       $totalFreqLo=0;
00161       for ($count=$start;$count<sizeof($statsList);$count++)
00162       {
00163         $statInfo = &$statsList[$count];
00164         $multiplier = 1;
00165         // whole word matches have a double weight
00166         if ($statInfo["full"]) $multiplier=2;
00167         fseek($file,$statInfo["index"]); 
00168         $numDocs = readInt($file);
00169         $docInfo = array();
00170         // read docs info + occurrence frequency of the word
00171         for ($i=0;$i<$numDocs;$i++)
00172         {
00173           $idx=readInt($file); 
00174           $freq=readInt($file); 
00175           $docInfo[$i]=array("idx"  => $idx,
00176                              "freq" => $freq>>1,
00177                              "rank" => 0.0,
00178                              "hi"   => $freq&1
00179                             );
00180           if ($freq&1) // word occurs in high priority doc
00181           {
00182             $totalHi++;
00183             $totalFreqHi+=$freq*$multiplier;
00184           }
00185           else // word occurs in low priority doc
00186           {
00187             $totalFreqLo+=$freq*$multiplier;
00188           }
00189         }
00190         // read name and url info for the doc
00191         for ($i=0;$i<$numDocs;$i++)
00192         {
00193           fseek($file,$docInfo[$i]["idx"]);
00194           $docInfo[$i]["name"]=readString($file);
00195           $docInfo[$i]["url"]=readString($file);
00196         }
00197         $statInfo["docs"]=$docInfo;
00198       }
00199       $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
00200       for ($count=$start;$count<sizeof($statsList);$count++)
00201       {
00202         $statInfo = &$statsList[$count];
00203         $multiplier = 1;
00204         // whole word matches have a double weight
00205         if ($statInfo["full"]) $multiplier=2;
00206         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00207         {
00208           $docInfo = &$statInfo["docs"];
00209           // compute frequency rank of the word in each doc
00210           $freq=$docInfo[$i]["freq"];
00211           if ($docInfo[$i]["hi"])
00212           {
00213             $statInfo["docs"][$i]["rank"]=
00214               (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
00215           }
00216           else
00217           {
00218             $statInfo["docs"][$i]["rank"]=
00219               (float)($freq*$multiplier)/$totalFreq;
00220           }
00221         }
00222       }
00223     }
00224   }
00225   return $statsList;
00226 }
00227 
00228 function combine_results($results,&$docs)
00229 {
00230   foreach ($results as $wordInfo)
00231   {
00232     $docsList = &$wordInfo["docs"];
00233     foreach ($docsList as $di)
00234     {
00235       $key=$di["url"];
00236       $rank=$di["rank"];
00237       if (in_array($key, array_keys($docs)))
00238       {
00239         $docs[$key]["rank"]+=$rank;
00240       }
00241       else
00242       {
00243         $docs[$key] = array("url"=>$key,
00244             "name"=>$di["name"],
00245             "rank"=>$rank
00246             );
00247       }
00248       $docs[$key]["words"][] = array(
00249                "word"=>$wordInfo["word"],
00250                "match"=>$wordInfo["match"],
00251                "freq"=>$di["freq"]
00252                );
00253     }
00254   }
00255   return $docs;
00256 }
00257 
00258 function filter_results($docs,&$requiredWords,&$forbiddenWords)
00259 {
00260   $filteredDocs=array();
00261   while (list ($key, $val) = each ($docs)) 
00262   {
00263     $words = &$docs[$key]["words"];
00264     $copy=1; // copy entry by default
00265     if (sizeof($requiredWords)>0)
00266     {
00267       foreach ($requiredWords as $reqWord)
00268       {
00269         $found=0;
00270         foreach ($words as $wordInfo)
00271         { 
00272           $found = $wordInfo["word"]==$reqWord;
00273           if ($found) break;
00274         }
00275         if (!$found) 
00276         {
00277           $copy=0; // document contains none of the required words
00278           break;
00279         }
00280       }
00281     }
00282     if (sizeof($forbiddenWords)>0)
00283     {
00284       foreach ($words as $wordInfo)
00285       {
00286         if (in_array($wordInfo["word"],$forbiddenWords))
00287         {
00288           $copy=0; // document contains a forbidden word
00289           break;
00290         }
00291       }
00292     }
00293     if ($copy) $filteredDocs[$key]=$docs[$key];
00294   }
00295   return $filteredDocs;
00296 }
00297 
00298 function compare_rank($a,$b)
00299 {
00300   if ($a["rank"] == $b["rank"]) 
00301   {
00302     return 0;
00303   }
00304   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00305 }
00306 
00307 function sort_results($docs,&$sorted)
00308 {
00309   $sorted = $docs;
00310   usort($sorted,"compare_rank");
00311   return $sorted;
00312 }
00313 
00314 function report_results(&$docs)
00315 {
00316   echo "<table cellspacing=\"2\">\n";
00317   echo "  <tr>\n";
00318   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00319   echo "  </tr>\n";
00320   $numDocs = sizeof($docs);
00321   if ($numDocs==0)
00322   {
00323     echo "  <tr>\n";
00324     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00325     echo "  </tr>\n";
00326   }
00327   else
00328   {
00329     echo "  <tr>\n";
00330     echo "    <td colspan=\"2\">".matches_text($numDocs);
00331     echo "\n";
00332     echo "    </td>\n";
00333     echo "  </tr>\n";
00334     $num=1;
00335     foreach ($docs as $doc)
00336     {
00337       echo "  <tr>\n";
00338       echo "    <td align=\"right\">$num.</td>";
00339       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00340       echo "  <tr>\n";
00341       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00342       foreach ($doc["words"] as $wordInfo)
00343       {
00344         $word = $wordInfo["word"];
00345         $matchRight = substr($wordInfo["match"],strlen($word));
00346         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00347       }
00348       echo "    </td>\n";
00349       echo "  </tr>\n";
00350       $num++;
00351     }
00352   }
00353   echo "</table>\n";
00354 }
00355 
00356 function main()
00357 {
00358   if(strcmp('4.1.0', phpversion()) > 0) 
00359   {
00360     die("Error: PHP version 4.1.0 or above required!");
00361   }
00362   if (!($file=fopen("search.idx","rb"))) 
00363   {
00364     die("Error: Search index file could NOT be opened!");
00365   }
00366   if (readHeader($file)!="DOXS")
00367   {
00368     die("Error: Header of index file is invalid!");
00369   }
00370   $query="";
00371   if (array_key_exists("query", $_GET))
00372   {
00373     $query=$_GET["query"];
00374   }
00375   end_form($query);
00376   echo "&nbsp;\n<div class=\"searchresults\">\n";
00377   $results = array();
00378   $requiredWords = array();
00379   $forbiddenWords = array();
00380   $foundWords = array();
00381   $word=strtok($query," ");
00382   while ($word) // for each word in the search query
00383   {
00384     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00385     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00386     if (!in_array($word,$foundWords))
00387     {
00388       $foundWords[]=$word;
00389       search($file,strtolower($word),$results);
00390     }
00391     $word=strtok(" ");
00392   }
00393   $docs = array();
00394   combine_results($results,$docs);
00395   // filter out documents with forbidden word or that do not contain
00396   // required words
00397   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00398   // sort the results based on rank
00399   $sorted = array();
00400   sort_results($filteredDocs,$sorted);
00401   // report results to the user
00402   report_results($sorted);
00403   echo "</div>\n";
00404   fclose($file);
00405 }
00406 
00407 main();
00408 
00409 
00410 ?>
00411 </td>
00412 </table>
00413 </body>
00414 </html>

Generated on Tue May 8 17:05:32 2007 for KTechLab by  doxygen 1.5.1