Maks150988
Advanced Member | Редактировать | Профиль | Сообщение | Цитировать | Сообщить модератору Код: unit F_StrUtils; interface uses Windows; {function Is_US_ASCII_Ansi(Buf: Pointer; Size: Integer): Boolean;} {function Is_US_ASCII_Wide(Buf: Pointer; Size: Integer): Boolean;} function Is_UTF_8_BOM(Buf: Pointer; Size: Integer): Boolean; // utf-8 function Is_UTF_16_BE_BOM(Buf: Pointer; Size: Integer): Boolean; // unicode function Is_UTF_16_LE_BOM(Buf: Pointer; Size: Integer): Boolean; // bigendian function SwapUTF16Endian(P: WideChar): WideChar; function UTF8ToStrSmart(s: AnsiString): AnsiString; implementation {function Is_US_ASCII_Ansi(Buf: Pointer; Size: Integer): Boolean; var I: Integer; P: PAnsiChar; begin P := Buf; for I := 1 to Size do if (Ord(P^) >= $80) then begin Result := FALSE; Exit; end else Inc(P); Result := TRUE; end;} {function Is_US_ASCII_Wide(Buf: Pointer; Size: Integer): Boolean; var I: Integer; P: PWideChar; begin P := Buf; for I := 1 to Size do if (Ord(P^) >= $80) then begin Result := FALSE; Exit; end else Inc(P); Result := TRUE; end;} function Is_UTF_8_BOM(Buf: Pointer; Size: Integer): Boolean; var Q: PAnsiChar; P: PAnsiChar; begin Result := FALSE; P := PAnsiChar(Buf); if Assigned(P) and (Size >= 3) and (P^ = #$EF) then begin Q := P; Inc(Q); if (Q^ = #$BB) then begin Inc(Q); if (Q^ = #$BF) then Result := TRUE; end; end; end; function Is_UTF_16_BE_BOM(Buf: Pointer; Size: Integer): Boolean; var P: PAnsiChar; begin P := PAnsiChar(Buf); Result := Assigned(P) and (Size >= Sizeof(WideChar)) and (PWideChar(P)^ = WideChar($FEFF)); end; function Is_UTF_16_LE_BOM(Buf: Pointer; Size: Integer): Boolean; var P: PAnsiChar; begin P := PAnsiChar(Buf); Result := Assigned(P) and (Size >= Sizeof(WideChar)) and (PWideChar(P)^ = WideChar($FFFE)); end; function SwapUTF16Endian(P: WideChar): WideChar; begin Result := WideChar(((Ord(P) and $FF) shl 8) or (Ord(P) shr 8)); end; function UTF8ToStrSmart(s: AnsiString): AnsiString; var str: AnsiString; idx: Integer; hex: Byte; cln: Byte; buf: Pointer; len: Integer; begin Result := ''; cln := 0; if (s = '') then Exit; for idx := 1 to Length(s) do begin if (cln > 0) then begin str := str + s[idx]; Dec(cln); if (cln = 0) then begin len := Length(str) + 4; GetMem(buf, len); ZeroMemory(buf, len); MultiByteToWideChar(CP_UTF8, 0, @str[1], len - 4, buf, len); Result := Result + WideCharToString(buf); FreeMem(buf, len); end; end else begin hex := Ord(s[idx]); if hex in [$00..$7F] then // Standart ASCII chars Result := Result + s[idx] else begin // Get lgth of UTF-8 char if hex and $FC = $FC then cln := 6 else if hex and $F8 = $F8 then cln := 5 else if hex and $F0 = $F0 then cln := 4 else if hex and $E0 = $E0 then cln := 3 else if hex and $C0 = $C0 then cln := 2 else begin Result := Result + s[idx]; Continue; end; Dec(cln); str := s[idx]; end; end; end; end; end. | Код: function ExtractMemoryBufferString(pMem: Pointer): WideString; const UTF8BOM = #$EF#$BB#$BF; UTF16BEBOM = #$FF#$FE; UTF16LEBOM = #$FF#$FE#$00; var a: AnsiString; w: WideString; i: Integer; begin // мы должны преобразовать открытый текстовый документ в удобоваримый вид. // для этого перекодируем текст в нужную кодировку, проверив исходные данные. // исходим из того что пользователь сохранял текст в блокноте, а значит // доступны кодировки: ansi, utf-8, unicode, big endian. // кодировка Unicode. if Is_UTF_16_BE_BOM(pMem, SizeOf(pMem)) then begin w := PWideChar(pMem); i := lstrlenW(UTF16BEBOM) - 1; // len-1??? Delete(w, 1, i); Result := w; end else // кодировка Unicode Big Endian. if Is_UTF_16_LE_BOM(pMem, SizeOf(pMem)) then begin w := PWideChar(pMem); i := lstrlenW(UTF16LEBOM) - 1; // len-1??? Delete(w, 1, i); for i := 1 to Length(w) do Result := Result + SwapUTF16Endian(w[i]); end else // кодировка UTF-8. if Is_UTF_8_BOM(pMem, SizeOf(pMem)) then begin a := PAnsiChar(pMem); i := lstrlen(UTF8BOM); Delete(a, 1, i); w := AnsiToWide(UTF8ToStrSmart(a), GetLocalePage); Result := w; end else // иначе считаем все остальное как кодировку Ansi. begin a := PAnsiChar(pMem); w := AnsiToWide(a, GetLocalePage); Result := w; end; end; | Код: hFile : Cardinal; hMem : Cardinal; pMem : Pointer; dwRead: DWORD; sMem : WideString; hFile := CreateFileW( ofn.lpstrFile, GENERIC_READ or GENERIC_WRITE, FILE_SHARE_READ or FILE_SHARE_WRITE, nil, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0 ); if (hFile <> INVALID_HANDLE_VALUE) then begin hMem := GlobalAlloc(GMEM_MOVEABLE or GMEM_ZEROINIT, 65535); pMem := GlobalLock(hMem); ReadFile(hFile, pMem^, 65535 - 1, dwRead, nil); // sMem := ''; sMem := ExtractMemoryBufferString(pMem); CloseHandle(hFile); GlobalUnlock(HGLOBAL(pMem)); GlobalFree(hMem); end; | |