Vengat Vengat - 3 months ago 12
Vb.net Question

How to convert all pages of doc file to seperate images

The following is the code i m using to convert doc file to image.this works well for a file that contains only one page but if there are more than one page in doc file then it converts only first page of file to image.Can some one suggest me how to convert all pages of doc file to seperate images.

Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Dim objWord As New Microsoft.Office.Interop.Word.Application
Dim objDoc As Microsoft.Office.Interop.Word.Document
Const CF_ENHMETAFILE As Integer = 14
objDoc = objWord.Documents.Open("F:\Study\Constructor.docx")
objWord.Activedocument.Select()
objWord.Selection.CopyAsPicture()
Dim ip As IntPtr
Dim metaFile As System.Drawing.Imaging.Metafile
Dim bRet As Boolean
bRet = ClipboardAPI.OpenClipboard(Me.Handle)
If bRet = True Then
'Verify the clipboard contains data available
'as an enhanced metafile.
bRet = ClipboardAPI.IsClipboardFormatAvailable(CF_ENHMETAFILE) <> 0
End If

If bRet = True Then
'Store the clipboard's contents in the IntPtr.
ip = ClipboardAPI.GetClipboardData(CF_ENHMETAFILE)
End If

'Verify the IntPrt contains data before proceeding. Passing
'an empty IntPtr to System.Drawing.Imaging.Metafile results
'in an exception.
If Not IntPtr.Zero.Equals(ip) Then
metaFile = New System.Drawing.Imaging.Metafile(ip, True)
ClipboardAPI.CloseClipboard()
Dim image As System.Drawing.Image = metaFile
'Me.PictureBox1.Image = metaFile

Dim objImageWriter As Image = New Bitmap(image.Width, image.Height)

Dim objGraphics As Graphics = Graphics.FromImage(objImageWriter)

objGraphics.Clear(Color.White)
'objGraphics.SmoothingMode = Drawing2D.SmoothingMode.AntiAlias
objGraphics.DrawImage(image, 0, 0, image.Width, image.Height)


image.Dispose()
objGraphics.Dispose()

Dim ep As Imaging.EncoderParameters = New Imaging.EncoderParameters
ep.Param(0) = New System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Quality, 100)

Dim codecs() As Imaging.ImageCodecInfo = Imaging.ImageCodecInfo.GetImageEncoders()
Dim iciInfo As Imaging.ImageCodecInfo
Dim item As Imaging.ImageCodecInfo

For Each item In codecs
If (item.MimeType = "image/jpeg") Then iciInfo = item
Next

objImageWriter.Save("F:\Study\test1.jpg", iciInfo, ep)
objImageWriter.Dispose()


End If


Public Class ClipboardAPI
<Runtime.InteropServices.DllImport("user32.dll", EntryPoint:="OpenClipboard", SetLastError:=True, ExactSpelling:=True, CallingConvention:=System.Runtime.InteropServices.CallingConvention.StdCall)> _
Public Shared Function OpenClipboard(ByVal hWnd As IntPtr) As Boolean
End Function

<Runtime.InteropServices.DllImport("user32.dll", EntryPoint:="EmptyClipboard", SetLastError:=True, ExactSpelling:=True, CallingConvention:=System.Runtime.InteropServices.CallingConvention.StdCall)> _
Public Shared Function EmptyClipboard() As Boolean
End Function

<Runtime.InteropServices.DllImport("user32.dll", EntryPoint:="SetClipboardData", SetLastError:=True, ExactSpelling:=True, CallingConvention:=System.Runtime.InteropServices.CallingConvention.StdCall)> _
Public Shared Function SetClipboardData(ByVal uFormat As Integer, ByVal ByValhWnd As IntPtr) As IntPtr
End Function

<Runtime.InteropServices.DllImport("user32.dll", EntryPoint:="CloseClipboard", SetLastError:=True, ExactSpelling:=True, CallingConvention:=System.Runtime.InteropServices.CallingConvention.StdCall)> _
Public Shared Function CloseClipboard() As Boolean
End Function

<Runtime.InteropServices.DllImport("user32.dll", EntryPoint:="GetClipboardData", SetLastError:=True, ExactSpelling:=True, CallingConvention:=System.Runtime.InteropServices.CallingConvention.StdCall)> _
Public Shared Function GetClipboardData(ByVal uFormat As Integer) As IntPtr
End Function

<Runtime.InteropServices.DllImport("user32.dll", EntryPoint:="IsClipboardFormatAvailable", SetLastError:=True, ExactSpelling:=True, CallingConvention:=System.Runtime.InteropServices.CallingConvention.StdCall)> _
Public Shared Function IsClipboardFormatAvailable(ByVal uFormat As Integer) As Short
End Function
End Class

Answer

The problem is that the line "objWord.Activedocument.Select()" references the entire document rather than the document's individual pages. I've added a bit to your code to snap an image of each page's contents:

Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
    Dim objWord As New Microsoft.Office.Interop.Word.Application
    Dim objDoc As Microsoft.Office.Interop.Word.Document
    Const CF_ENHMETAFILE As Integer = 14
    objDoc = objWord.Documents.Open("F:\Study\Constructor.docx")

    objDoc.Repaginate()
    For i As Integer = 1 To objDoc.ActiveWindow.Panes(1).Pages.Count
        If i = 1 Then
            With objWord.ActiveDocument
                .GoTo(WdGoToItem.wdGoToPage, WdGoToDirection.wdGoToAbsolute, 1)
                .Bookmarks("\Page").Range.Select()
            End With
        Else
            With objWord.Selection
                .GoTo(What:=WdGoToItem.wdGoToPage, Which:=WdGoToDirection.wdGoToNext)
                .Bookmarks("\Page").Range.Select()
            End With
        End If

        objWord.Selection.CopyAsPicture()
        Dim ip As IntPtr
        Dim metaFile As System.Drawing.Imaging.Metafile
        Dim bRet As Boolean
        bRet = ClipboardAPI.OpenClipboard(Me.Handle)
        If bRet = True Then
            'Verify the clipboard contains data available
            'as an enhanced metafile.
            bRet = ClipboardAPI.IsClipboardFormatAvailable(CF_ENHMETAFILE) <> 0
        End If

        If bRet = True Then
            'Store the clipboard's contents in the IntPtr.
            ip = ClipboardAPI.GetClipboardData(CF_ENHMETAFILE)
        End If

        'Verify the IntPrt contains data before proceeding. Passing
        'an empty IntPtr to System.Drawing.Imaging.Metafile results
        'in an exception.
        If Not IntPtr.Zero.Equals(ip) Then
            metaFile = New System.Drawing.Imaging.Metafile(ip, True)
            ClipboardAPI.CloseClipboard()
            Dim image As System.Drawing.Image = metaFile
            'Me.PictureBox1.Image = metaFile

            Dim objImageWriter As Image = New Bitmap(image.Width, image.Height)

            Dim objGraphics As Graphics = Graphics.FromImage(objImageWriter)

            objGraphics.Clear(Color.White)
            'objGraphics.SmoothingMode = Drawing2D.SmoothingMode.AntiAlias
            objGraphics.DrawImage(image, 0, 0, image.Width, image.Height)


            image.Dispose()
            objGraphics.Dispose()

            Dim ep As Imaging.EncoderParameters = New Imaging.EncoderParameters
            ep.Param(0) = New System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Quality, 100)

            Dim codecs() As Imaging.ImageCodecInfo = Imaging.ImageCodecInfo.GetImageEncoders()
            Dim iciInfo As Imaging.ImageCodecInfo
            Dim item As Imaging.ImageCodecInfo

            For Each item In codecs
                If (item.MimeType = "image/jpeg") Then iciInfo = item
            Next

            objImageWriter.Save("F:\Study\test" & i.ToString & ".jpg", iciInfo, ep)
            objImageWriter.Dispose()
        End If
    Next
End Sub

Summary of the additional code changes:

I added "objDoc.Repaginate()" to get accurate page references. Word does not really make use of pages ordinarily, it is constantly querying the system's print driver to decide where it needs to break text up into pages. This ensures that we have an accurate page count in accordance with the current machine.

I enclosed your image logic in this for-loop: "For i As Integer = 1 To objDoc.ActiveWindow.Panes(1).Pages.Count". The if-else directly following that line will select the first page in the first iteration, and then any subsequent additional pages thereafter. Everything else that follows is unchanged except for the save-filename.

Lastly, I just concatenated the page number into the image's save-path for obvious reasons...

I tested this on my own computer and it worked as intended, I hope this helps!

...Just an off-topic sidenote, I don't know if the code disposing of the Word handles just wasn't included in your question or if it's actually missing, but you may want to make sure you add that; Interop class loves to leave running office processes in the background even after the program has closed if they aren't disposed of properly, this example was leaving them open on my computer.