DEV Community

Teddy Zugana
Teddy Zugana

Posted on • Edited on

5 1

PHP Doc or Docx Word File to TXT extract Content Example

class Doc2Txt {
private $filename;

public function __construct($filePath) {
    $this->filename = $filePath;
}

private function read_doc() {
    $fileHandle = fopen($this->filename, "r");
    $line = @fread($fileHandle, filesize($this->filename));   
    $lines = explode(chr(0x0D),$line);
    $outtext = "";
    foreach($lines as $thisline)
      {
        $pos = strpos($thisline, chr(0x00));
        if (($pos !== FALSE)||(strlen($thisline)==0))
          {
          } else {
            $outtext .= $thisline." ";
          }
      }
     $outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
    return $outtext;
}

private function read_docx(){

    $striped_content = '';
    $content = '';

    $zip = zip_open($this->filename);

    if (!$zip || is_numeric($zip)) return false;

    while ($zip_entry = zip_read($zip)) {

        if (zip_entry_open($zip, $zip_entry) == FALSE) continue;

        if (zip_entry_name($zip_entry) != "word/document.xml") continue;

        $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));

        zip_entry_close($zip_entry);
    }// end while

    zip_close($zip);

    $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
    $content = str_replace('</w:r></w:p>', "\r\n", $content);
    $striped_content = strip_tags($content);

    return $striped_content;
}

public function convertToText() {

    if(isset($this->filename) && !file_exists($this->filename)) {
        return "File Not exists";
    }

    $fileArray = pathinfo($this->filename);
    $file_ext  = $fileArray['extension'];
    if($file_ext == "doc" || $file_ext == "docx")
    {
        if($file_ext == "doc") {
            return $this->read_doc();
        } else {
            return $this->read_docx();
        }
    } else {
        return "Invalid File Type";
    }
}
}
Enter fullscreen mode Exit fullscreen mode

call class example :

  $docObj = new Doc2Txt($inputfile);

  $txt = $docObj->convertToText();
Enter fullscreen mode Exit fullscreen mode

Image of Docusign

🛠️ Bring your solution into Docusign. Reach over 1.6M customers.

Docusign is now extensible. Overcome challenges with disconnected products and inaccessible data by bringing your solutions into Docusign and publishing to 1.6M customers in the App Center.

Learn more

Top comments (0)

A Workflow Copilot. Tailored to You.

Pieces.app image

Our desktop app, with its intelligent copilot, streamlines coding by generating snippets, extracting code from screenshots, and accelerating problem-solving.

Read the docs