AaronStone AaronStone - 5 months ago 21
PHP Question

Search by phrase in files(doc,docx,xlsx,pdf) in php

So far I have made a website in which search features is there. There are different extensions of files(docx,doc,pdf etc..) which user can uploaded in the database. So I need to search the file by its content. By I'm not able to search it properly. I have made two features
1. search by name of file
2. search by phrase

Search by name is working perfectly but there is problem with the search by phrase part.I'm able to convert these files into a text file. But i don't know why I'm not able to search in that file.So, can anyone tell me where I'm wrong or provide me another solution for this.

Here is the code....

homepage.php

<form method="post" action="search1.php" class="container 50%" id="searchform">
<input type="text" name="name" placeholder="Enter the terms you wish to search for" />
<input type="submit" name="submit" value="Search" class="fit special" />
<input type="radio" id="name" name="search" value="name" class="fit special" />
<input type="radio" id="phrase" name="search" value="phrase" class="fit special" />
</form>


search1.php

<?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>

<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server');
mysql_select_db($dbname) or die('database selection problem');
?>

<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>
<div class="table-wrapper">
<table class="alt">
<thead>
<tr>
<th>File Name</th>
<th>View</th>
</tr>
</thead>
<?php
if(isset($_POST['submit'])){
$name=$_POST['name'];
if($name!=NULL)
{
if (!empty($_POST['search'])) {
if ($_POST['search']=="phrase") { //search by phrase
$searchthis = $name;
$matches = array();

$query = "SELECT file from ada ";
$query .= "UNION ";
$query .= "SELECT file from cdr ";
$query .= "UNION ";
$query .= "SELECT file from others ";
$query .= "UNION ";
$query .= "SELECT file from pdr ";
$query .= "UNION ";
$query .= "SELECT file from rr ";
$query .= "UNION ";
$query .= "SELECT file from sdd ";
$query .= "UNION ";
$query .= "SELECT file from tbl_uploads ";

$result = mysql_query($query);
$new_file = fopen("sample.txt","w") or die("Unable to open file!!");

while($row=mysql_fetch_array($result))
{
$filepath = getcwd() . "\uploads\\".$row['file'];
$path = str_replace('//', '\\', $filepath);
$Obj = new DocxConversion($path);
$Text= $Obj->convertToText();
fwrite($new_file,$Text);
echo $new_file."<br/>";
$handle = fopen($new_file, "r");
if ($handle)
{
while (!feof($handle))
{
$buffer = fgets($handle);
if(strpos($buffer, $searchthis) !== FALSE)
{
$matches[] = $row['file'];
break;
}

}
fclose($handle);
}
}
$matches = array_filter($matches);

if (!empty($matches))
{
foreach($matches as $row)
{
?>
<tr>
<td><?php echo $row ?></td>
<td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
</tr>
<?php
}
}
else
{
//echo " Phrase not found!!!";
?>
<script>
alert('Phrase not Found');
window.location.href='homepage.php';
</script>
<?php
}
}
else{ //search by name
$array = array(
"db1" => "ada",
"db2" => "cdr",
"db3" => "others",
"db4" => "pdr",
"db5" => "rr",
"db6" => "sdd",
"db7" => "tbl_uploads",
);

//connect to the database
$db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database because:'.mysql_error());

//-select the database to use
$mydb=mysql_select_db("dbtuts");
$no_of_access = false;
while ($db_name = current($array))
{

//-query the database table
$sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";

//-run the query against the mysql query function
$result=mysql_query($sql);
$num_rows = mysql_num_rows($result);
if($num_rows > 0)
{
//-create while loop and loop through result set
$no_of_access = true;
while($row=mysql_fetch_array($result))
{
?>
<tr>
<td><?php echo $row['file'] ?></td>
<td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
</tr>
<?php
}
}
else
{
if(!$no_of_access && $db_name == "tbl_uploads")
//echo "<p> Result not found!!<p>";
{
?>
<script>
alert('Result Not Found!!');
window.location.href='homepage.php';
</script>
<?php
}
}
next($array);
}

}
}
else
{
//echo "<p>Please select an option</p>";
?>
<script>
alert('Please Select an option');
window.location.href='homepage.php';
</script>
<?php
}
}
else
{
//echo "<p>Please enter a search query</p>";
?>
<script>
alert('Please enter a search query');
window.location.href='homepage.php';
</script>
<?php
}
}
?>
</table>
</div>
</section>
</body>
</html>


The above code searches by name of the file perfectly but there is some problem with the by phrase part.

class.php

<?php require_once("/includes/pdf.php"); ?>
<?php
class DocxConversion{
private $filename;

public function __construct($filePath) {
$this->filename = $filePath;
}

/************************doc file************************************/
private function read_doc() {
$fileHandle = fopen($this->filename, "r");
$line = @fread($fileHandle, filesize($this->filename));
$lines = explode(chr(0x0D),$line);
$outtext = "";
foreach($lines as $thisline)
{
$pos = strpos($thisline, chr(0x00));
if (($pos !== FALSE)||(strlen($thisline)==0))
{
} else {
$outtext .= $thisline." ";
}
}
$outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
return $outtext;
}

/************************docx file************************************/
private function read_docx(){

$striped_content = '';
$content = '';

$zip = zip_open($this->filename);

if (!$zip || is_numeric($zip)) return false;

while ($zip_entry = zip_read($zip)) {

if (zip_entry_open($zip, $zip_entry) == FALSE) continue;

if (zip_entry_name($zip_entry) != "word/document.xml") continue;

$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));

zip_entry_close($zip_entry);
}// end while

zip_close($zip);

$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "\r\n", $content);
$striped_content = strip_tags($content);

return $striped_content;
}

/************************PDF file************************************/
private function read_pdf(){
$a=new PDF2Text();
$a->setFilename($this->filename);
$a->decodePDF();
echo $a->output();

}

/************************excel sheet************************************/

function xlsx_to_text($input_file){
$xml_filename = "xl/sharedStrings.xml"; //content file name
$zip_handle = new ZipArchive;
$output_text = "";
if(true === $zip_handle->open($input_file)){
if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){
$xml_datas = $zip_handle->getFromIndex($xml_index);
$xml_handle = new DOMDocument();
$xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$output_text = strip_tags($xml_handle->saveXML());
}else{
$output_text .="";
}
$zip_handle->close();
}else{
$output_text .="";
}
return $output_text;
}

/*************************power point files*****************************/
function pptx_to_text($input_file){
$zip_handle = new ZipArchive;
$output_text = "";
if(true === $zip_handle->open($input_file)){
$slide_number = 1; //loop through slide files
while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){
$xml_datas = $zip_handle->getFromIndex($xml_index);
$xml_handle = new DOMDocument();
$xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$output_text .= strip_tags($xml_handle->saveXML());
$slide_number++;
}
if($slide_number == 1){
$output_text .="";
}
$zip_handle->close();
}else{
$output_text .="";
}
return $output_text;
}


public function convertToText() {

if(isset($this->filename) && !file_exists($this->filename)) {
return "File Not exists";
}

$fileArray = pathinfo($this->filename);
$file_ext = $fileArray['extension'];
if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx" || $file_ext == "pdf")
{
if($file_ext == "doc") {
return $this->read_doc($this->filename);
} elseif($file_ext == "docx") {
return $this->read_docx($this->filename);
} elseif($file_ext == "xlsx") {
return $this->xlsx_to_text($this->filename);
}elseif($file_ext == "pptx") {
return $this->pptx_to_text($this->filename);
}elseif($file_ext == "pdf") {
return $this->read_pdf($this->filename);
}
} else {
return "Invalid File Type";
}
}

}

?>


the above code class.php converts the doc,docx,xlsx,pdf to text.

pdf.php
http://pastebin.com/dvwySU1a
this class converts a pdf file to a text file.

Answer

Finally i got the solution by myself

search1.php

<?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>

<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>

<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
      <table class="alt">
        <thead>
            <tr>
                <th>File Name</th>
                <th>View</th>
            </tr>
        </thead>    
<?php 
     if(isset($_POST['submit'])){ 
      $name=$_POST['name']; 
      if($name!=NULL)
      {
      if (!empty($_POST['search'])) {
        if ($_POST['search']=="phrase") { //search by phrase
            $searchthis = strtolower($name);
            $matches = array();
            $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );


        while ($db_name = current($array)) 
      {  

        $query= "SELECT file FROM $db_name";
        $result = mysql_query($query);

        while($row=mysql_fetch_array($result))
         {
            $filepath = getcwd() . "\uploads\\".$row['file'];
            $path = str_replace('//', '\\', $filepath);
            $Obj = new DocxConversion($path);
            $Text= $Obj->convertToText();
            $new_file = fopen("sample.txt","w") or die("Unable to open file!!");
            fwrite($new_file,strtolower($Text));

            $handle = fopen("sample.txt", "r");

            if ($handle)
             {
                while (!feof($handle))
                {
                     $buffer = fgets($handle);
                     if(strpos($buffer, $searchthis) !== FALSE)
                     {
                         $matches[] = $row['file'];
                         break;
                     }

                }
                    fclose($handle);
              }fclose($new_file);
         }next($array);
      } 
       $matches = array_filter($matches);

        if (!empty($matches)) 
        {
               foreach($matches as $row)
                {
                ?>
                <tr>
                <td><?php echo $row ?></td>
                <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
                </tr>
                <?php
                }
        }
        else
        {
            //echo " Phrase not found!!!";
            ?>
            <script>
                alert('Phrase not Found');
                window.location.href='homepage.php';
            </script>
            <?php
        }

      }
     else{                              //search by name
          $array = array(
        "db1" => "ada",
        "db2" => "cdr",
        "db3" => "others",
        "db4" => "pdr",
        "db5" => "rr",
        "db6" => "sdd",
        "db7" => "tbl_uploads",
        );

      //connect  to the database 
      $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 

      //-select  the database to use 
      $mydb=mysql_select_db("dbtuts"); 
      $no_of_access = false;
      while ($db_name = current($array)) 
      {  

      //-query  the database table 
      $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";

      //-run  the query against the mysql query function 
      $result=mysql_query($sql); 
      $num_rows = mysql_num_rows($result);
      if($num_rows > 0)
      {
      //-create  while loop and loop through result set 
      $no_of_access = true;
      while($row=mysql_fetch_array($result))
        {
        ?>
        <tr>
        <td><?php echo $row['file'] ?></td>
        <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
        </tr>
        <?php
        }
      }
      else 
        {
            if(!$no_of_access && $db_name == "tbl_uploads")
            //echo "<p> Result not found!!<p>";
            {
            ?>
            <script>
                alert('Result Not Found!!');
                window.location.href='homepage.php';
            </script>
            <?php
            }
        }
        next($array);
      }

     }    
     }
     else
          { 
            //echo  "<p>Please select an option</p>"; 
            ?>
            <script>
                alert('Please Select an option');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    } 
      else
          { 
            //echo  "<p>Please enter a search query</p>"; 
            ?>
            <script>
                alert('Please enter a search query');
                window.location.href='homepage.php';
            </script>
            <?php
          } 
    }
?> 
</table>
</div>
</section>  
</body> 
</html>
Comments