【问题标题】:Delphi 2010 : How to emulate the Delphi XE TStrings.Encoding property?Delphi 2010:如何模拟 Delphi XE TStrings.Encoding 属性?
【发布时间】:2017-08-12 12:34:52
【问题描述】:

Delphi XE 在TStrings 类中添加了一个Encoding 属性,该属性存储在调用LoadFromFile() 时从BOM 读取的编码。

Delphi 2010 没有这个属性。 我想效仿它。

我为TStrings 创建了下面的类助手。 助手工作,但要获得文件的 BOM,我发现的唯一解决方案是在 FileStream 中重新加载相同的文件。我想避免这种情况,因为TStrings.LoadFromFile() 已经得到了 BOM。

我如何告诉助手重新使用已经找到的 BOM?

unit TestEncodingName_00;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, ExtDlgs;

type
  TForm1 = class(TForm)
  Memo1: TMemo;
  procedure FormCreate(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;

TMyStrings = class helper for TStrings   // emulate TStrings.Encoding
private
  function GetEncodingName(fPath:string):string;
public
property EncodingName[fPath:string]:string read GetEncodingName;
end;

var
Form1: TForm1;

implementation

{$R *.dfm}

function  TMyStrings.GetEncodingName(fPath:string):string;
var
  fLen : integer;
  fBuffer : TBytes;
  fEncoding : TEncoding;
  fName : string;
  fFs : TFileStream;

begin
   fFs := TFileStream.Create(fPath, fmOpenRead);
 try
    SetLength(fBuffer, 4);
    flen := fFs.Read(fBuffer[0], 4);
    if flen < 4 then
      SetLength(fBuffer, flen);
    fEncoding := nil;
    TEncoding.GetBufferEncoding(fBuffer, fEncoding);
    if fEncoding = TEncoding.Unicode then
    fName := 'Unicode'
  else if fEncoding = TEncoding.UTF8 then
    fName := 'UTF8'
  else fName := 'Default';
  finally
    fFs.Free;
  end;
  result := fName;

end;

procedure TForm1.FormCreate(Sender: TObject);
var
  EncName : string;

begin
(* sample UTF8.txt
Ā ā Ă ă
Ρ Σ Τ Υ
ぁ あ ぃ
*)
  Memo1.Lines.LoadFromFile('Sample UTF8.txt');
  //from here TStrings knows the BOM but I don't know
  // how to refer to it...
  // so I have to create again a stream of the same file to
  // get the BOM. I don't like that.
  EncName :=  Memo1.Lines.EncodingName['Sample UTF8.txt'];
  Memo1.Lines.Add(#13#10'Encoding : ' + EncName);
end;
end. 

【问题讨论】:

    标签: delphi delphi-2010


    【解决方案1】:

    首先,发现 BOM 编码的是 LoadFromStream(),而不是 LoadFromFile()LoadFromFile() 只需将文件打开为TFileStream,然后调用LoadFromStream()

    在 Delphi(2009 和)2010 中,发现的 BOM 编码不会存储在您可以访问的任何位置。这正是 XE 通过添加新的 Encoding 属性解决的问题。该编码仅在解析前将文件数据解码为UnicodeString 时用作LoadFromStream() 内部的局部变量,然后在LoadFromStream() 退出时将其丢弃。您无法改变这种行为。

    因此,唯一的解决方案是手动加载文件,以便您可以发现其 BOM。理想情况下,您将在后代类中覆盖 LoadFromStream(),但不能使 TMemo.Lines 使用自定义类。 class helper 也不能覆盖虚拟方法。

    但是,您可以从TStringList 派生一个自定义类来覆盖LoadFromStream(),自己加载文件,然后将Assign() TStringListTMemo.Lines。例如:

    unit TestEncodingName_00;
    
    interface
    
    uses
      Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
      Dialogs, StdCtrls, ExtDlgs;
    
    type
      TForm1 = class(TForm)
        Memo1: TMemo;
        procedure FormCreate(Sender: TObject);
      private
        { Private declarations }
      public
        { Public declarations }
      end;
    
      TMyStringList = class(TStringList)
      private
        fEncoding: TEncoding;
      public
        { The single-parameter LoadFromStream(Stream: TStream) simply
        calls LoadFromStream(Stream: TStream; Encoding: TEncoding) with
        the Encoding parameter set to nil, so you only have to override
        that version of LoadFromStream()... }
        procedure LoadFromStream(Stream: TStream; Encoding: TEncoding); override;
        property Encoding: TEncoding read fEncoding;
      end;
    
    var
      Form1: TForm1;
    
    implementation
    
    {$R *.dfm}
    
    procedure TMyStringList.LoadFromStream(Stream: TStream; Encoding: TEncoding);
    var
      Size: Integer;
      Buffer: TBytes;
    begin
      { this is similar to the implementation that LoadFromStream()
      uses in XE+, but with some differences:
    
      1. the Encoding property is assigned a bit differently, as XE+
         utilizes a TEncoding.Clone() method when the specified Encoding
         is not a standard RTL encoding (ie, is a custom user class), but
         Clone() does not exist in D2009/2010.
    
      2. XE+ has a TStrings.DefaultEncoding property, which is passed
         to TEncoding.GetBufferEncoding() as the default to return if no
         BOM is detected, but that property and parameter do not exist in
         D2009/2010.
    
      3. TStrings.LoadFromStream() does not check if Size is 0 (file is empty)
         before dereferencing the Buffer that is passed to Stream.Read().
         That is a runtime crash waiting to happen! }
    
      BeginUpdate;
      try
        Size := Stream.Size - Stream.Position;
        SetLength(Buffer, Size);
        if Size > 0 then
          Stream.Read(Buffer[0], Size);
        Size := TEncoding.GetBufferEncoding(Buffer, Encoding);
        fEncoding := Encoding;
        SetTextStr(Encoding.GetString(Buffer, Size, Length(Buffer) - Size));
      finally
        EndUpdate;
      end;
    end;
    
    procedure TForm1.FormCreate(Sender: TObject);
    var
      EncName : string;
      List: TMyStringList;
    begin
      List := TMyStringList.Create;
      try
        List.LoadFromFile('Sample UTF8.txt');
        if List.Encoding = TEncoding.Unicode then
          EncName := 'Unicode'
        else if List.Encoding = TEncoding.UTF8 then
          EncName := 'UTF8'
        else
          EncName := 'Default';
    
        Memo1.Lines.Assign(List);
        Memo1.Lines.Add;
        Memo1.Lines.Add('Encoding : ' + EncName);
      finally
        List.Free;
      end;
    end;
    
    end. 
    

    【讨论】:

    • 非常感谢您的精彩回答,并编辑了我的帖子 :-) 当我再次阅读它以及您的回答时,我不记得我写了这么好的英语!
    猜你喜欢
    • 2010-12-18
    • 2023-03-22
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2011-05-09
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多