Código fuente del programa CharsetTest
Ver la entrada correspondiente en el siguiente enlace:
Conversión de Charset/Encoding en Java
package pruebas; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintStream; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; public class CharsetTest { public static void printAvailableCharsets(PrintStream out, boolean aShowAliases) { Map<String, Charset> charsetMap= Charset.availableCharsets(); Set<String> charsetNames= charsetMap.keySet(); Iterator<String> charsetIt= charsetNames.iterator(); while (charsetIt.hasNext() == true) { String csName= charsetIt.next(); out.print(csName); if (aShowAliases == true) { Charset charset= charsetMap.get(csName); Set<String> aliases= charset.aliases(); Iterator aliasIter= aliases.iterator(); if (aliasIter.hasNext() == true) { out.print(": "); while (aliasIter.hasNext() == true) { out.print( aliasIter.next() ); if (aliasIter.hasNext() == true) { out.print(", "); } } } } out.println(); } } //------------------------------------------------------------------------------------------ public static String doConversion( String input, Charset inputCharset, Charset outputCharset ) throws IOException { CharsetEncoder inEncoder= inputCharset.newEncoder(); //Se configura que se provoque error ante cualquier problema de conversión. inEncoder= inEncoder.onMalformedInput( CodingErrorAction.REPORT ); inEncoder= inEncoder.onUnmappableCharacter( CodingErrorAction.REPORT ); CharsetDecoder outDecoder= outputCharset.newDecoder(); //Se configura que se provoque error ante cualquier problema de conversión. outDecoder= outDecoder.onMalformedInput( CodingErrorAction.REPORT ); outDecoder= outDecoder.onUnmappableCharacter( CodingErrorAction.REPORT ); CharBuffer charBuffer= CharBuffer.wrap(input); ByteBuffer byteBuffer= inEncoder.encode(charBuffer); charBuffer= outDecoder.decode(byteBuffer); String output= charBuffer.toString(); return output; } //------------------------------------------------------------------------------------------ /** * Example: * System.err.println( CharsetTest.doConversion("áéíóú", "utf8", "ISO-8859-15") ); */ public static String doConversion( String input, String inputCharsetName, String outputCharsetName ) throws IOException { Charset inputCharset; Charset outputCharset; try { inputCharset= Charset.forName(inputCharsetName); outputCharset= Charset.forName(outputCharsetName); } catch (Exception ex) { String msg= String.format("%s: %s", ex.getClass().getSimpleName(), ex.getLocalizedMessage()); RuntimeException rte= new RuntimeException(msg, ex); throw rte; } String output= CharsetTest.doConversion(input, inputCharset, outputCharset); return output; } //------------------------------------------------------------------------------------------ public static IOException wrapCharacterCodingException( CharacterCodingException ex, int consumedChars, int consumedBytes ) { String msg= String.format("After sucessfully reading %s characters (%s bytes): %s: %s" , ex.getClass().getSimpleName() , consumedChars , consumedBytes , ex.getLocalizedMessage() ); IOException tmp= new IOException(msg, ex); return tmp; } //------------------------------------------------------------------------------------------ public static void doConversion( InputStream inputStream, Charset inputCharset, OutputStream outputStream, Charset outputCharset ) throws IOException { CharsetDecoder inDecoder= inputCharset.newDecoder(); //Se configura que se provoque error ante cualquier problema de conversión. inDecoder= inDecoder.onMalformedInput( CodingErrorAction.REPORT ); inDecoder= inDecoder.onUnmappableCharacter( CodingErrorAction.REPORT ); CharsetEncoder outEncoder= outputCharset.newEncoder(); //Se configura que se provoque error ante cualquier problema de conversión. outEncoder= outEncoder.onMalformedInput( CodingErrorAction.REPORT ); outEncoder= outEncoder.onUnmappableCharacter( CodingErrorAction.REPORT ); InputStreamReader isR= new InputStreamReader(inputStream, inDecoder); //Se utiliza un buffer de 1 byte para poder tener precisión al contar los bytes cuando se produce un error. //De no ser así, una sóla llamada a read() puede leer varios bytes. BufferedReader inBR= new BufferedReader(isR, 1); //Se convierte de character en character. //The maximum number of bytes per character is 4 according to RFC3629 which limited the character table to U+10FFFF. ByteBuffer byteBuffer= ByteBuffer.allocate(4); CharBuffer charBuffer= CharBuffer.allocate(1); byte[] byteArray= new byte[4]; int consumedChars= 0; int consumedBytes= 0; int r; do { try { //El CharsetDecoder puede lanzar excepciones. r= inBR.read(); } catch (CharacterCodingException ex) { IOException tmp= CharsetTest.wrapCharacterCodingException(ex, consumedChars, consumedBytes); throw tmp; } if (r != -1) { //Para escribir a partir del comienzo. charBuffer.put(0, (char) r); //Para leer a partir del comienzo. charBuffer.rewind(); //Para escribir a partir del comienzo. byteBuffer.rewind(); CoderResult cr= outEncoder.encode(charBuffer, byteBuffer, false); if (cr.isError() == true) { try { cr.throwException(); } catch (CharacterCodingException ex) { IOException tmp= CharsetTest.wrapCharacterCodingException(ex, consumedChars, consumedBytes); throw tmp; } } //MUY IMPORTANTE: hay que salvaguardar position() antes del rewind(). int outByteCount= byteBuffer.position(); //Para leer a partir del comienzo. byteBuffer.rewind(); byteBuffer.get(byteArray, 0, outByteCount); outputStream.write(byteArray, 0, outByteCount); consumedChars++; consumedBytes= consumedBytes + outByteCount; } } while (r != -1); outputStream.flush(); } //------------------------------------------------------------------------------------------ public static void main(String[] args) { InputStream fis= null; PrintStream err= System.err; try { if (args.length != 3) { CharsetTest.printAvailableCharsets(err, true); err.println(); throw new RuntimeException("Usage: intputFileName inputCharsetName outputCharsetName"); } String intputFileName= args[0]; String inputCharsetName= args[1]; String outputCharsetName= args[2]; Charset inputCharset; Charset outputCharset; try { inputCharset= Charset.forName(inputCharsetName); outputCharset= Charset.forName(outputCharsetName); } catch (Exception ex) { String msg= String.format("%s: %s", ex.getClass().getSimpleName(), ex.getLocalizedMessage()); RuntimeException rte= new RuntimeException(msg, ex); throw rte; } try { fis= new FileInputStream(intputFileName); } catch (FileNotFoundException ex) { String msg= String.format("%s: %s", ex.getClass().getSimpleName(), ex.getLocalizedMessage()); RuntimeException rte= new RuntimeException(msg, ex); throw rte; } PrintStream outPS= System.out; CharsetTest.doConversion(fis, inputCharset, outPS, outputCharset); } catch (Exception ex) { err.println( ex.getMessage() ); System.exit(1); } finally { if (fis != null) { try { fis.close(); } catch (IOException ex) { Logger.getLogger(CharsetTest.class.getName()).log(Level.SEVERE, null, ex); //Se silencia el error. } } } } //------------------------------------------------------------------------------------------ }
No hay comentarios:
Publicar un comentario