【问题标题】:Why passing char as parameter to islower() does not work correctly?为什么将 char 作为参数传递给 islower() 不能正常工作?
【发布时间】:2016-11-08 07:29:53
【问题描述】:

我的设置:glibc 2.24、gcc 6.2.0、UTF-8 环境。

当我们在下面的test.c中调用islower()时,结果打印正确:

#include <locale.h>
#include <ctype.h>
#include <stdio.h>
int main (void)
{
  setlocale(LC_ALL, "fr_FR.ISO-8859-1");
  if (islower(0xff)) return 0;
  return 1;
}
$ gcc -g -o test test.c
$ ./test; echo $?
0

现在我们把0xff改成'ÿ',得到test2.c

#include <locale.h>
#include <ctype.h>
#include <stdio.h>
int main (void)
{
  setlocale(LC_ALL, "fr_FR.ISO-8859-1");
  if (islower('ÿ')) return 0;
  return 1;
}

虽然输出应该是0,和test.c一样,但是不一样:

$ gcc -g -fexec-charset=iso8859-1 -o test2 test2.c
$ ./test2; echo $?
1

iso8859-1 语言环境已正确安装:

$ locale -a
C
C.UTF-8
en_US.utf8
french
fr_FR
fr_FR.iso88591
POSIX

stracegdb 都没有透露任何有用的信息。

下面是testtest2 的strace 输出。


execve("./test", ["./test"], [/* 43 vars */]) = 0
brk(NULL)                               = 0x557a2ad1a000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f13c7e91000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=98552, ...}) = 0
mmap(NULL, 98552, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f13c7e78000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\3\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1685264, ...}) = 0
mmap(NULL, 3791264, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f13c78d4000
mprotect(0x7f13c7a69000, 2093056, PROT_NONE) = 0
mmap(0x7f13c7c68000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x194000) = 0x7f13c7c68000
mmap(0x7f13c7c6e000, 14752, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f13c7c6e000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f13c7e76000
arch_prctl(ARCH_SET_FS, 0x7f13c7e76700) = 0
mprotect(0x7f13c7c68000, 16384, PROT_READ) = 0
mprotect(0x557a29abe000, 4096, PROT_READ) = 0
mprotect(0x7f13c7e94000, 4096, PROT_READ) = 0
munmap(0x7f13c7e78000, 98552)           = 0
brk(NULL)                               = 0x557a2ad1a000
brk(0x557a2ad3b000)                     = 0x557a2ad3b000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=3517584, ...}) = 0
mmap(NULL, 3517584, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f13c7579000
close(3)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++
execve("./test2", ["./test2"], [/* 43 vars */]) = 0
brk(NULL)                               = 0x5603a66f6000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24d23c4000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=98552, ...}) = 0
mmap(NULL, 98552, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f24d23ab000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\3\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1685264, ...}) = 0
mmap(NULL, 3791264, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24d1e07000
mprotect(0x7f24d1f9c000, 2093056, PROT_NONE) = 0
mmap(0x7f24d219b000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x194000) = 0x7f24d219b000
mmap(0x7f24d21a1000, 14752, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f24d21a1000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24d23a9000
arch_prctl(ARCH_SET_FS, 0x7f24d23a9700) = 0
mprotect(0x7f24d219b000, 16384, PROT_READ) = 0
mprotect(0x5603a560b000, 4096, PROT_READ) = 0
mprotect(0x7f24d23c7000, 4096, PROT_READ) = 0
munmap(0x7f24d23ab000, 98552)           = 0
brk(NULL)                               = 0x5603a66f6000
brk(0x5603a6717000)                     = 0x5603a6717000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=3517584, ...}) = 0
mmap(NULL, 3517584, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f24d1aac000
close(3)                                = 0
exit_group(1)                           = ?
+++ exited with 1 +++

为什么test2 不返回0

【问题讨论】:

  • printf("%d", 'ÿ'); printf 是什么意思?
  • @MichaelWalz 它打印-1,但是我应该如何在字符ÿ 上调用islower()?原来islower('a');是合法的,但islower('ÿ');不是?这是糟糕的设计。

标签: c type-conversion implicit-conversion


【解决方案1】:

问题归结为:

如果我这样写:

int x = 'ÿ';

x 将是 -1,但我希望它是 255。

它是 -1,因为 'ÿ'char 的值 0xff。但是由于char 是在您的平台上签名的,因此当您将其分配给int 时会发生签名扩展。

解决办法是这样写:

int x = (unsigned char)'ÿ';

所以不要写islower('ÿ'),而是写islower((unsigned char)'ÿ')

【讨论】:

  • 从这个答案可以看出char 类型默认必须是无符号的,因为根据其名称,它用于存储 char 角色。你能想到阻止 gcc 开发人员使用 unsigned 作为char 的默认值的任何具体原因吗?
  • @IgorLiferenko 看看this SO question
猜你喜欢
  • 1970-01-01
  • 2016-07-15
  • 2016-08-16
  • 1970-01-01
  • 2016-09-30
  • 1970-01-01
  • 1970-01-01
  • 2012-04-04
相关资源
最近更新 更多