【问题标题】:Delphi 2009 - Strip non alpha numeric from stringDelphi 2009 - 从字符串中去除非字母数字
【发布时间】:2010-03-16 05:54:33
【问题描述】:

我有以下代码,需要去除所有非字母数字字符。它在 delphi 2009 中不起作用

unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;
Type
     TExplodeArray = Array Of String;

  TForm2 = class(TForm)
    Memo1: TMemo;
    ListBox1: TListBox;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
    Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
    Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
    Function StripHTML ( S : String ) : String;
    function allwords(data:string):integer;
  end;

var
  Form2: TForm2;
  allword, allphrase: TExplodeArray;

implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
     TagBegin, TagEnd, TagLength : Integer;
Begin
     TagBegin := Pos ( '<', S );      // search position of first <

     While ( TagBegin > 0 ) Do
          Begin  // while there is a < in S
          TagEnd := Pos ( '>', S );              // find the matching >
          TagLength := TagEnd - TagBegin + 1;
          Delete ( S, TagBegin, TagLength );     // delete the tag
          TagBegin := Pos ( '<', S );            // search for next <
          End;

     Result := S;                   // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
     i : Integer;
Begin
     Result := '';
     For i := 0 To Length ( cArray ) - 1 Do
          Begin
          Result := Result + cSeparator + cArray [i];
          End;
     System.Delete ( Result, 1, Length ( cSeparator ) );
End;

Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
     i : Integer;
     S : String;
Begin
     S := vString;
     SetLength ( Result, 0 );
     i := 0;
     While Pos ( cSeparator, S ) > 0 Do
          Begin
          SetLength ( Result, Length ( Result ) + 1 );
          Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
          Inc ( i );
          S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
          End;
     SetLength ( Result, Length ( Result ) + 1 );
     Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
  Source, Dest: PChar;
begin
  SetLength(Result, Length(S));
  UniqueString(Result);
  Source := PChar(S);
  Dest := PChar(Result);
  while (Source <> nil) and (Source^ <> #0) do
  begin
    if Source^ in Chars then
    begin
      Dest^ := Source^;
      Inc(Dest);
    end;
    Inc(Source);
  end;
  SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> {greater than less than} #0 do begin
    if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
      DestPtr[0] := '\';
      DestPtr[1] := 't';
      Inc(SrcPtr);
      Inc(DestPtr);
    end else
      DestPtr[0] := SrcPtr[0];
    Inc(SrcPtr);
    Inc(DestPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> #0 do begin
    if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
      DestPtr[0] := SrcPtr[0];
      Inc(DestPtr);
    end;
    Inc(SrcPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
  listbox1.Items.add(data);
  data:= StripHTML ( data );
  listbox1.Items.add(data);
  //////////////////////////////////////////////////////////////
  data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
  // Strips out everything data comes back blank in Delphi 2009
  //////////////////////////////////////////////////////////////
  listbox1.Items.add(data);
  data := stringreplace(data,'  ',' ', [rfReplaceAll, rfIgnoreCase] );
  //Replace two spaces with one.
  listbox1.Items.add(data);
  allword:= explode(' ',data);
 { // Converting the following PHP code to Delphi
    $text = ereg_replace("[^[:alnum:]]", " ", $text);
    while(strpos($text,'  ')!==false) $text = ereg_replace("  ", " ", $text);
    $text=$string=strtolower($text);
    $text=explode(" ",$text);
    return count($text);
}
 for I := 0 to Length(allword) - 1 do
 listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]

allwords(memo1.Text);
end;

end.

我该怎么做呢?

【问题讨论】:

    标签: delphi delphi-2009 delphi-2010


    【解决方案1】:

    想到的最简单的解决方案是定义一个正则表达式,它返回输入字符串减去其中的任何非字母字符。

    【讨论】:

    • 是的,我打算将 RegEx [^[:alnum:]] 与 TPerlRegEx 一起使用,但我真的不知道如何正确使用它。 [^[:alnum:]] 会完美运行。
    • 好吧,我在 RegEx 中使用了你的建议,并在几个小时后想通了!
    【解决方案2】:

    我已经有一段时间没有使用 Delphi 做了很多事情了 - 版本 5 是我的游乐场。

    Delphi 2009 的主要功能之一不是它现在默认使用 Unicode。

    这对尝试逐字符处理的任何事物都有影响。会不会是你问题的根源?

    【讨论】:

      【解决方案3】:
      
      Uses StrUtils; //StuffString
      
      var
          Regex: TPerlRegEx;
        I:Integer;
      begin
      Regex := TPerlRegEx.Create(nil);
      Regex.RegEx := '[^[:alnum:]]';
      Regex.Options := [preMultiLine];
      Regex.Subject := data;
      if Regex.Match then begin
          repeat
          data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
          until not Regex.MatchAgain;
      end;
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 2018-10-21
        • 1970-01-01
        • 2011-09-13
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多