viernes, 30 de enero de 2015

Source code: CharsetTest

Código fuente del programa CharsetTest

Ver la entrada correspondiente en el siguiente enlace:


Conversión de Charset/Encoding en Java


package pruebas;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;


public class CharsetTest {

    public static void printAvailableCharsets(PrintStream out, boolean aShowAliases) {
        Map<String, Charset> charsetMap= Charset.availableCharsets();
        Set<String> charsetNames= charsetMap.keySet();
        Iterator<String> charsetIt= charsetNames.iterator();

        while (charsetIt.hasNext() == true) {
            String csName= charsetIt.next();
            out.print(csName);

            if (aShowAliases == true) {
                Charset charset= charsetMap.get(csName);
                Set<String> aliases= charset.aliases();
                Iterator aliasIter= aliases.iterator();

                if (aliasIter.hasNext() == true) {
                    out.print(": ");

                    while (aliasIter.hasNext() == true) {
                        out.print( aliasIter.next() );

                        if (aliasIter.hasNext() == true) {
                            out.print(", ");
                        }
                    }
                }
            }

            out.println();
        }
    }
    //------------------------------------------------------------------------------------------

    public static String doConversion(
        String input,
        Charset inputCharset,
        Charset outputCharset
    ) throws IOException {
        CharsetEncoder inEncoder= inputCharset.newEncoder();
        //Se configura que se provoque error ante cualquier problema de conversión.
        inEncoder= inEncoder.onMalformedInput( CodingErrorAction.REPORT );
        inEncoder= inEncoder.onUnmappableCharacter( CodingErrorAction.REPORT );

        CharsetDecoder outDecoder= outputCharset.newDecoder();
        //Se configura que se provoque error ante cualquier problema de conversión.
        outDecoder= outDecoder.onMalformedInput( CodingErrorAction.REPORT );
        outDecoder= outDecoder.onUnmappableCharacter( CodingErrorAction.REPORT );

        CharBuffer charBuffer= CharBuffer.wrap(input);
        ByteBuffer byteBuffer= inEncoder.encode(charBuffer);

        charBuffer= outDecoder.decode(byteBuffer);
        String output= charBuffer.toString();

        return output;
    }
    //------------------------------------------------------------------------------------------

    /**
     * Example:
     * System.err.println( CharsetTest.doConversion("áéíóú",  "utf8", "ISO-8859-15") );
     */
    public static String doConversion(
        String input,
        String inputCharsetName,
        String outputCharsetName
    ) throws IOException {
        Charset inputCharset;
        Charset outputCharset;

        try {
            inputCharset= Charset.forName(inputCharsetName);
            outputCharset= Charset.forName(outputCharsetName);
        }
        catch (Exception ex) {
            String msg= String.format("%s: %s", ex.getClass().getSimpleName(), ex.getLocalizedMessage());
            RuntimeException rte= new RuntimeException(msg, ex);
            throw rte;
        }

        String output= CharsetTest.doConversion(input, inputCharset, outputCharset);

        return output;
    }
    //------------------------------------------------------------------------------------------

    public static IOException wrapCharacterCodingException(
        CharacterCodingException ex,
        int consumedChars,
        int consumedBytes
    ) {
        String msg= String.format("After sucessfully reading %s characters (%s bytes): %s: %s"
            , ex.getClass().getSimpleName()
            , consumedChars
            , consumedBytes
            , ex.getLocalizedMessage()
        );
        IOException tmp= new IOException(msg, ex);

        return tmp;
    }
    //------------------------------------------------------------------------------------------

    public static void doConversion(
        InputStream inputStream,
        Charset inputCharset,
        OutputStream outputStream,
        Charset outputCharset
    ) throws IOException {
        CharsetDecoder inDecoder= inputCharset.newDecoder();
        //Se configura que se provoque error ante cualquier problema de conversión.
        inDecoder= inDecoder.onMalformedInput( CodingErrorAction.REPORT );
        inDecoder= inDecoder.onUnmappableCharacter( CodingErrorAction.REPORT );

        CharsetEncoder outEncoder= outputCharset.newEncoder();
        //Se configura que se provoque error ante cualquier problema de conversión.
        outEncoder= outEncoder.onMalformedInput( CodingErrorAction.REPORT );
        outEncoder= outEncoder.onUnmappableCharacter( CodingErrorAction.REPORT );

        InputStreamReader isR= new InputStreamReader(inputStream, inDecoder);
        //Se utiliza un buffer de 1 byte para poder tener precisión al contar los bytes cuando se produce un error.
        //De no ser así, una sóla llamada a read() puede leer varios bytes.
        BufferedReader inBR= new BufferedReader(isR, 1);

        //Se convierte de character en character.
        //The maximum number of bytes per character is 4 according to RFC3629 which limited the character table to U+10FFFF.
        ByteBuffer byteBuffer= ByteBuffer.allocate(4);
        CharBuffer charBuffer= CharBuffer.allocate(1);
        byte[] byteArray= new byte[4];

        int consumedChars= 0;
        int consumedBytes= 0;
        int r;

        do {
            try {
                //El CharsetDecoder puede lanzar excepciones.
                r= inBR.read();
            }
            catch (CharacterCodingException ex) {
                IOException tmp= CharsetTest.wrapCharacterCodingException(ex, consumedChars, consumedBytes);
                throw tmp;
            }

            if (r != -1) {
                //Para escribir a partir del comienzo.
                charBuffer.put(0, (char) r);
                //Para leer a partir del comienzo.
                charBuffer.rewind();

                //Para escribir a partir del comienzo.
                byteBuffer.rewind();
                CoderResult cr= outEncoder.encode(charBuffer, byteBuffer, false);
                if (cr.isError() == true) {
                    try {
                        cr.throwException();
                    }
                    catch (CharacterCodingException ex) {
                        IOException tmp= CharsetTest.wrapCharacterCodingException(ex, consumedChars, consumedBytes);
                        throw tmp;
                    }
                }

                //MUY IMPORTANTE: hay que salvaguardar position() antes del rewind().
                int outByteCount=  byteBuffer.position();
                //Para leer a partir del comienzo.
                byteBuffer.rewind();
                byteBuffer.get(byteArray, 0, outByteCount);

                outputStream.write(byteArray, 0, outByteCount);
                consumedChars++;
                consumedBytes= consumedBytes + outByteCount;
            }
        } while (r != -1);

        outputStream.flush();
    }
    //------------------------------------------------------------------------------------------

    public static void main(String[] args) {
        InputStream fis= null;
        PrintStream err= System.err;

        try {
            if (args.length != 3) {
                CharsetTest.printAvailableCharsets(err, true);
                err.println();

                throw new RuntimeException("Usage: intputFileName inputCharsetName outputCharsetName");
            }

            String intputFileName= args[0];
            String inputCharsetName= args[1];
            String outputCharsetName= args[2];

            Charset inputCharset;
            Charset outputCharset;
            try {
                inputCharset= Charset.forName(inputCharsetName);
                outputCharset= Charset.forName(outputCharsetName);
            }
            catch (Exception ex) {
                String msg= String.format("%s: %s", ex.getClass().getSimpleName(), ex.getLocalizedMessage());
                RuntimeException rte= new RuntimeException(msg, ex);
                throw rte;
            }

            try {
                fis= new FileInputStream(intputFileName);
            }
            catch (FileNotFoundException ex) {
                String msg= String.format("%s: %s", ex.getClass().getSimpleName(), ex.getLocalizedMessage());
                RuntimeException rte= new RuntimeException(msg, ex);
                throw rte;
            }

            PrintStream outPS= System.out;

            CharsetTest.doConversion(fis, inputCharset, outPS, outputCharset);
        }
        catch (Exception ex) {
            err.println( ex.getMessage() );
            System.exit(1);
        }
        finally {
            if (fis != null) {
                try {
                    fis.close();
                }
                catch (IOException ex) {
                    Logger.getLogger(CharsetTest.class.getName()).log(Level.SEVERE, null, ex);
                    //Se silencia el error.
                }
            }
        }
    }
    //------------------------------------------------------------------------------------------

}

No hay comentarios:

Publicar un comentario