【问题标题】:How to convert ucs2 encoded file to either UTF-8 or UTF-16 or ANSI encoding format using java如何使用 java 将 ucs2 编码文件转换为 UTF-8 或 UTF-16 或 ANSI 编码格式
【发布时间】:2017-02-01 01:11:06
【问题描述】:

我得到的是 ucs-2 编码的 XML 文件。我想使用 java 代码将此编码转换为 UTF-8 或 UTF -16 或 ANSI。

你能帮忙吗?

【问题讨论】:

标签: java xml encoding utf-8 utf-16


【解决方案1】:

我必须做类似的事情,这就是我想出的(我删除了几个方法,但这对于您的用例来说应该足够了)。顺便说一句,据我所知 UCS-2 可能与 UTF-16 相同(前提是字节顺序相同)

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;



enum EncodingType { 
    UTF8(0),
    UTF16BE(1), 
    UTF16LE(2), 
    ISO_8859_1(3),
    ISO_8859_2(4),
    UNKNOWN(5);
    private final int val;
    EncodingType(int val){ 
        this.val= val;
    }
    public int getIntValue(){
        return val;
    }
};

public class TextConverter{

    public  EncodingType encodingType;
    private EncodingType inputEncoding = EncodingType.UTF8;
    private EncodingType outputEncoding = EncodingType.UTF8;

    public final static String[] encodingNames = { "UTF-8","UTF-16BE","UTF-16LE", "ISO-8859-1","ISO-8859-2", "UNKNOWN" };

//the check methods are only required for querying file encodings but    don't fully rely on them because not all encodings have header bytes and you can change encoding on a file
    private final static boolean checkUTF8(byte[] header){

        return ((header[0]&0xFF)==0xEF && (header[1]&0xFF)==0xBB && (header[2]&0xFF)==0xBF)?true:false;
    }
    private final static boolean checkUTF16BE(byte[] header){

        return ((header[0]&0xFF)==0xFE && (header[1]&0xFF)==0xFF)?true:false;
    }
    private final static boolean checkUTF16LE(byte[] header){

        return ((header[0]&0xFF)==0xFF && (header[1]&0xFE)==0xFE)?true:false;
    }
    public EncodingType getInputEncoding(){
        return inputEncoding;
    }
    public EncodingType getOutputEncoding(){
        return outputEncoding;
    }
    public void setInputEncoding(EncodingType enc){
        this.inputEncoding = enc;
    }
    public void setOutputEncoding(EncodingType enc){
        this.outputEncoding = enc;
    }

    /**
     * writes a file from a string using the encoding specified in outputEncoding member variable
     * @param fileName
     * @param content
     * @throws IOException
     */
    public void writeFile(String fileName, String content)throws IOException{
        BufferedWriter bw=null;
        try {
            File file = new File(fileName);

            bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), encodingNames[outputEncoding.getIntValue()])) ;
            bw.write(content);


        }
        catch(Exception e){
            System.out.println(e);

        }finally  {
            if(bw!=null)
                bw.close();
        }
    }
    /**
     * this method reads a file and converts it to a string using the encoding specified in inputEncoding member variable
     * use the setInputEncoding(EncodingType ) to set the encoding
     * @param fileName
     * @return
     * @throws IOException
     */
    public  String readFile(String fileName) throws IOException{

        String fileContent="";
            String del =  System.getProperty("line.separator");

        BufferedReader br=null;                   

        String encoding = encodingNames[inputEncoding.getIntValue()];
        try {
            File file = new File(fileName);
            br = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding)) ;
            String line = null;

            for ( line = br.readLine(); line != null; line = br.readLine())
            {
                fileContent+=(line+del);
            }
        }
        catch(Exception e){
            System.out.println(e);
        }finally  {
            if(br!=null)
                br.close();
        }
        /*String converted = convertToAllowedChars(fileContent);


        System.out.println("FILE CONTENT");   
        System.out.println(fileContent);*/

        return fileContent;

    }

}

您还可以将所有成员设为静态,因为这可能对您更有意义。当然,您可以以任何您认为合适的方式修改此代码。

【讨论】:

  • 您好,感谢您的回复。我收到 ava.io.UnsupportedEncodingException: UCS-2LE 错误。我正在使用 java 6。解决此问题的其他解决方案是什么?
  • 您应该使用 UTF-16LE 或 UTF-16BE(如果您仔细观察我的代码,您会发现我使用的是字符串(在 encodingNames 中定义),而不是枚举 EncodingType,
  • 您好,我将输入编码保留为 UTF-16LE,输出编码保留为 UTF-8。文件已生成,但如果在 notepad++ 中打开此文件,则编码仍为 UCS-2LE
  • 用某种十六进制编辑器检查输入和输出文件(看看它们是否不同)。您不能依赖 Notepad++ 或任何其他文本编辑器来可靠地确定编码。另外,尝试使用 UTF-8 在 Notepad++ 中打开生成的文件,看看内容是否正常。
猜你喜欢
  • 2012-06-30
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2014-02-17
  • 2011-11-20
  • 2011-11-07
  • 2017-05-10
  • 2013-09-26
相关资源
最近更新 更多