2011年1月8日土曜日

Wordで2バイト文字のチェック

Wordデータを英語環境に持って行くと2バイトフォントが使われている箇所を探せないかという話があったのでいい方法はないかな?

下のサンプルは少々強引だが・・・選択範囲に使用している文字のフォント名を取得してWin32APIを使用してフォントが2バイト(日中韓)フォントかの確認をしています。
当然OSに無いフォントがある場合は正常に動作しません。たぶん・・・。

簡単に説明するとCharacter.Font.Nameが実際に表示されているフォントでCharacter.Font.ASCIIが欧文フォント、Character.Font.NameFarEastが日中韓フォントのどれかになります。
他にもアラビア語用とかも選択できるみたいだがめんどくさいので割愛ということで。

Option Explicit
Private Const LF_FACESIZE       As Long = 32&
Private Const LF_FULLFACESIZE   As Long = 64&

Private Const DEVICE_FONTTYPE   As Long = &H2&
Private Const RASTER_FONTTYPE   As Long = &H1&
Private Const TRUETYPE_FONTTYPE As Long = &H4&

Private Type LOGFONT
    lfHeight As Long
    lfWidth As Long
    lfEscapement As Long
    lfOrientation As Long
    lfWeight As Long
    lfItalic As Byte
    lfUnderline As Byte
    lfStrikeOut As Byte
    lfCharSet As Byte
    lfOutPrecision As Byte
    lfClipPrecision As Byte
    lfQuality As Byte
    lfPitchAndFamily As Byte
    lfFaceName(LF_FACESIZE - 1&) As Byte
End Type

Type NEWTEXTMETRIC
    tmHeight As Long
    tmAscent As Long
    tmDescent As Long
    tmInternalLeading As Long
    tmExternalLeading As Long
    tmAveCharWidth As Long
    tmMaxCharWidth As Long
    tmWeight As Long
    tmOverhang As Long
    tmDigitizedAspectX As Long
    tmDigitizedAspectY As Long
    tmFirstChar As Byte
    tmLastChar As Byte
    tmDefaultChar As Byte
    tmBreakChar As Byte
    tmItalic As Byte
    tmUnderlined As Byte
    tmStruckOut As Byte
    tmPitchAndFamily As Byte
    tmCharSet As Byte
    ntmFlags As Long
    ntmSizeEM As Long
    ntmCellHeight As Long
    ntmAveWidth As Long
End Type

Private Const ANSI_CHARSET        As Byte = 0&
Private Const BALTIC_CHARSET      As Byte = 186&
Private Const CHINESEBIG5_CHARSET As Byte = 136&
Private Const DEFAULT_CHARSET     As Byte = 1&
Private Const EASTEUROPE_CHARSET  As Byte = 238&
Private Const GB2312_CHARSET      As Byte = 134&
Private Const GREEK_CHARSET       As Byte = 161&
Private Const HANGUL_CHARSET      As Byte = 129&
Private Const MAC_CHARSET         As Byte = 77&
Private Const OEM_CHARSET         As Byte = 255&
Private Const RUSSIAN_CHARSET     As Byte = 204&
Private Const SHIFTJIS_CHARSET    As Byte = 128&
Private Const SYMBOL_CHARSET      As Byte = 2&
Private Const TURKISH_CHARSET     As Byte = 162&
Private Const VIETNAMESE_CHARSET  As Byte = 163&

Private IsCJK As Boolean                    ' CJKフォントかの確認

Private Declare Function EnumFontFamiliesEx Lib "gdi32" Alias "EnumFontFamiliesExA" ( _
    ByVal hdc As Long, _
    lpLogFont As LOGFONT, _
    ByVal lpEnumFontProc As Long, _
    ByVal LParam As Long, _
    ByVal dw As Long _
) As Long

Private Declare Sub MoveMemory Lib "kernel32" Alias "RtlMoveMemory" ( _
    Destination As Any, _
    Source As Any, _
    ByVal Length As Long _
)

Private Declare Function GetDC Lib "user32" ( _
    ByVal hwnd As Long _
) As Long

Public Sub CheckFarEastFont()
' フォントが日中韓フォントかの確認
    Dim wd_Selection As Word.Selection
    Dim i As Long
    
    Set wd_Selection = Application.Selection
    
    For i = 1 To wd_Selection.Characters.Count
        With wd_Selection.Characters(i)
            If .Font.Name = .Font.NameFarEast And .Font.Name <> "" Then
                Call FontCheck(.Font)
            End If
        End With
    Next
End Sub

Private Sub FontCheck(wd_Font As Word.Font)
' フォントが日中韓のエンコードを所持しているかの確認
    Dim bytBuf()    As Byte
    Dim udtLogFont  As LOGFONT
    
    bytBuf = StrConv(wd_Font.Name, vbFromUnicode)
    With udtLogFont
        .lfCharSet = DEFAULT_CHARSET
        Call MoveMemory(.lfFaceName(0), bytBuf(0), UBound(bytBuf) + 1&)
        .lfPitchAndFamily = 0&
    End With
    
    IsCJK = False
    Call EnumFontFamiliesEx(GetDC(0), udtLogFont, AddressOf EnumFontFamProc, ByVal 0&, 0)
    If IsCJK Then
        wd_Font.Animation = wdAnimationMarchingRedAnts    ' CJKフォントの場合赤い線のアニメーション
    Else
        wd_Font.Animation = wdAnimationNone
    End If
End Sub

Private Function EnumFontFamProc(lpNLF As LOGFONT, lpNTM As NEWTEXTMETRIC, ByVal FontType As Long, LParam As Long) As Long
    Dim FaceName As String
    
    FaceName = StrConv(lpNLF.lfFaceName, vbUnicode)
    Debug.Print Left$(FaceName, InStr(FaceName, vbNullChar) - 1)    ' フォント名
    EnumFontFamProc = 1
    Select Case lpNLF.lfCharSet
    Case SHIFTJIS_CHARSET, GB2312_CHARSET, CHINESEBIG5_CHARSET, HANGUL_CHARSET:
        IsCJK = True
    End Select
End Function

しかし、Unicodeの時代なのに未だに2バイト文字というのもなんだかなぁ。

0 件のコメント:

コメントを投稿