//reject overly long 2 byte sequences, as well as characters above U+10000 and replace with ?
$some_string = preg_replace('/[x00-x08x10x0Bx0Cx0E-x19x7F]'.
 '|[x00-x7F][x80-xBF]+'.
 '|([xC0xC1]|[xF0-xFF])[x80-xBF]*'.
 '|[xC2-xDF]((?![x80-xBF])|[x80-xBF]{2,})'.
 '|[xE0-xEF](([x80-xBF](?![x80-xBF]))|(?![x80-xBF]{2})|[x80-xBF]{3,})/S',
 '?', $some_string );

//reject overly long 3 byte sequences and UTF-16 surrogates and replace with ?
$some_string = preg_replace('/xE0[x80-x9F][x80-xBF]'.
 '|xED[xA0-xBF][x80-xBF]/S','?', $some_string );

 

相关文章:

  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-08-30
猜你喜欢
  • 2021-06-17
  • 2022-12-23
  • 2021-12-30
  • 2021-08-25
  • 2022-12-23
相关资源
相似解决方案