1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
/* * @(#)URLDecoder.java 1.28 05/11/17 * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package java.net; import java.io.*; /** * Utility class for HTML form decoding. This class contains static methods * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> * MIME format. * <p> * To conversion process is the reverse of that used by the URLEncoder class. It is assumed * that all characters in the encoded string are one of the following: * "<code>a</code>" through "<code>z</code>", * "<code>A</code>" through "<code>Z</code>", * "<code>0</code>" through "<code>9</code>", and * "<code>-</code>", "<code>_</code>", * "<code>.</code>", and "<code>*</code>". The * character "<code>%</code>" is allowed but is interpreted * as the start of a special escaped sequence. * <p> * The following rules are applied in the conversion: * <p> * <ul> * <li>The alphanumeric characters "<code>a</code>" through * "<code>z</code>", "<code>A</code>" through * "<code>Z</code>" and "<code>0</code>" * through "<code>9</code>" remain the same. * <li>The special characters "<code>.</code>", * "<code>-</code>", "<code>*</code>", and * "<code>_</code>" remain the same. * <li>The plus sign "<code>+</code>" is converted into a * space character "<code> </code>" . * <li>A sequence of the form "<code>%<i>xy</i></code>" will be * treated as representing a byte where <i>xy</i> is the two-digit * hexadecimal representation of the 8 bits. Then, all substrings * that contain one or more of these byte sequences consecutively * will be replaced by the character(s) whose encoding would result * in those consecutive bytes. * The encoding scheme used to decode these characters may be specified, * or if unspecified, the default encoding of the platform will be used. * </ul> * <p> * There are two possible ways in which this decoder could deal with * illegal strings. It could either leave illegal characters alone or * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>. * Which approach the decoder takes is left to the * implementation. * * @author Mark Chamness * @author Michael McCloskey * @version 1.28, 11/17/05 * @since 1.2 */ public class URLDecoder { // The platform default encoding static String dfltEncName = URLEncoder.dfltEncName; /** * Decodes a <code>x-www-form-urlencoded</code> string. * The platform's default encoding is used to determine what characters * are represented by any consecutive sequences of the form * "<code>%<i>xy</i></code>". * @param s the <code>String</code> to decode * @deprecated The resulting string may vary depending on the platform's * default encoding. Instead, use the decode(String,String) method * to specify the encoding. * @return the newly decoded <code>String</code> */ @Deprecated public static String decode(String s) { String str = null; try { str = decode(s, dfltEncName); } catch (UnsupportedEncodingException e) { // The system should always have the platform default } return str; } /** * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific * encoding scheme. * The supplied encoding is used to determine * what characters are represented by any consecutive sequences of the * form "<code>%<i>xy</i></code>". * <p> * <em><strong>Note:</strong> The <a href= * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> * World Wide Web Consortium Recommendation</a> states that * UTF-8 should be used. Not doing so may introduce * incompatibilites.</em> * * @param s the <code>String</code> to decode * @param enc The name of a supported * <a href="../lang/package-summary.html#charenc">character * encoding</a>. * @return the newly decoded <code>String</code> * @exception UnsupportedEncodingException * If character encoding needs to be consulted, but * named character encoding is not supported * @see URLEncoder#encode(java.lang.String, java.lang.String) * @since 1.4 */ public static String decode(String s, String enc) throws UnsupportedEncodingException{ boolean needToChange = false; int numChars = s.length(); StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars); int i = 0; if (enc.length() == 0) { throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); } char c; byte[] bytes = null; while (i < numChars) { c = s.charAt(i); switch (c) { case '+': sb.append(' '); i++; needToChange = true; break; case '%': /* * Starting with this instance of %, process all * consecutive substrings of the form %xy. Each * substring %xy will yield a byte. Convert all * consecutive bytes obtained this way to whatever * character(s) they represent in the provided * encoding. */ try { // (numChars-i)/3 is an upper bound for the number // of remaining bytes if (bytes == null) bytes = new byte[(numChars-i)/3]; int pos = 0; while ( ((i+2) < numChars) && (c=='%')) { bytes[pos++] = (byte)Integer.parseInt(s.substring(i+1,i+3),16); i+= 3; if (i < numChars) c = s.charAt(i); } // A trailing, incomplete byte encoding such as // "%x" will cause an exception to be thrown if ((i < numChars) && (c=='%')) throw new IllegalArgumentException( "URLDecoder: Incomplete trailing escape (%) pattern"); sb.append(new String(bytes, 0, pos, enc)); } catch (NumberFormatException e) { throw new IllegalArgumentException( "URLDecoder: Illegal hex characters in escape (%) pattern - " + e.getMessage()); } needToChange = true; break; default: sb.append(c); i++; break; } } return (needToChange? sb.toString() : s); } }