001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload;
018
019import java.io.UnsupportedEncodingException;
020import java.util.HashMap;
021import java.util.Locale;
022import java.util.Map;
023
024import org.apache.commons.fileupload.util.mime.MimeUtility;
025
026/**
027 * A simple parser intended to parse sequences of name/value pairs.
028 *
029 * Parameter values are expected to be enclosed in quotes if they
030 * contain unsafe characters, such as '=' characters or separators.
031 * Parameter values are optional and can be omitted.
032 *
033 * <p>
034 *  <code>param1 = value; param2 = "anything goes; really"; param3</code>
035 * </p>
036 *
037 * @version $Id: ParameterParser.java 1565253 2014-02-06 13:48:16Z ggregory $
038 */
039public class ParameterParser {
040
041    /**
042     * String to be parsed.
043     */
044    private char[] chars = null;
045
046    /**
047     * Current position in the string.
048     */
049    private int pos = 0;
050
051    /**
052     * Maximum position in the string.
053     */
054    private int len = 0;
055
056    /**
057     * Start of a token.
058     */
059    private int i1 = 0;
060
061    /**
062     * End of a token.
063     */
064    private int i2 = 0;
065
066    /**
067     * Whether names stored in the map should be converted to lower case.
068     */
069    private boolean lowerCaseNames = false;
070
071    /**
072     * Default ParameterParser constructor.
073     */
074    public ParameterParser() {
075        super();
076    }
077
078    /**
079     * Are there any characters left to parse?
080     *
081     * @return <tt>true</tt> if there are unparsed characters,
082     *         <tt>false</tt> otherwise.
083     */
084    private boolean hasChar() {
085        return this.pos < this.len;
086    }
087
088    /**
089     * A helper method to process the parsed token. This method removes
090     * leading and trailing blanks as well as enclosing quotation marks,
091     * when necessary.
092     *
093     * @param quoted <tt>true</tt> if quotation marks are expected,
094     *               <tt>false</tt> otherwise.
095     * @return the token
096     */
097    private String getToken(boolean quoted) {
098        // Trim leading white spaces
099        while ((i1 < i2) && (Character.isWhitespace(chars[i1]))) {
100            i1++;
101        }
102        // Trim trailing white spaces
103        while ((i2 > i1) && (Character.isWhitespace(chars[i2 - 1]))) {
104            i2--;
105        }
106        // Strip away quotation marks if necessary
107        if (quoted
108            && ((i2 - i1) >= 2)
109            && (chars[i1] == '"')
110            && (chars[i2 - 1] == '"')) {
111            i1++;
112            i2--;
113        }
114        String result = null;
115        if (i2 > i1) {
116            result = new String(chars, i1, i2 - i1);
117        }
118        return result;
119    }
120
121    /**
122     * Tests if the given character is present in the array of characters.
123     *
124     * @param ch the character to test for presense in the array of characters
125     * @param charray the array of characters to test against
126     *
127     * @return <tt>true</tt> if the character is present in the array of
128     *   characters, <tt>false</tt> otherwise.
129     */
130    private boolean isOneOf(char ch, final char[] charray) {
131        boolean result = false;
132        for (char element : charray) {
133            if (ch == element) {
134                result = true;
135                break;
136            }
137        }
138        return result;
139    }
140
141    /**
142     * Parses out a token until any of the given terminators
143     * is encountered.
144     *
145     * @param terminators the array of terminating characters. Any of these
146     * characters when encountered signify the end of the token
147     *
148     * @return the token
149     */
150    private String parseToken(final char[] terminators) {
151        char ch;
152        i1 = pos;
153        i2 = pos;
154        while (hasChar()) {
155            ch = chars[pos];
156            if (isOneOf(ch, terminators)) {
157                break;
158            }
159            i2++;
160            pos++;
161        }
162        return getToken(false);
163    }
164
165    /**
166     * Parses out a token until any of the given terminators
167     * is encountered outside the quotation marks.
168     *
169     * @param terminators the array of terminating characters. Any of these
170     * characters when encountered outside the quotation marks signify the end
171     * of the token
172     *
173     * @return the token
174     */
175    private String parseQuotedToken(final char[] terminators) {
176        char ch;
177        i1 = pos;
178        i2 = pos;
179        boolean quoted = false;
180        boolean charEscaped = false;
181        while (hasChar()) {
182            ch = chars[pos];
183            if (!quoted && isOneOf(ch, terminators)) {
184                break;
185            }
186            if (!charEscaped && ch == '"') {
187                quoted = !quoted;
188            }
189            charEscaped = (!charEscaped && ch == '\\');
190            i2++;
191            pos++;
192
193        }
194        return getToken(true);
195    }
196
197    /**
198     * Returns <tt>true</tt> if parameter names are to be converted to lower
199     * case when name/value pairs are parsed.
200     *
201     * @return <tt>true</tt> if parameter names are to be
202     * converted to lower case when name/value pairs are parsed.
203     * Otherwise returns <tt>false</tt>
204     */
205    public boolean isLowerCaseNames() {
206        return this.lowerCaseNames;
207    }
208
209    /**
210     * Sets the flag if parameter names are to be converted to lower case when
211     * name/value pairs are parsed.
212     *
213     * @param b <tt>true</tt> if parameter names are to be
214     * converted to lower case when name/value pairs are parsed.
215     * <tt>false</tt> otherwise.
216     */
217    public void setLowerCaseNames(boolean b) {
218        this.lowerCaseNames = b;
219    }
220
221    /**
222     * Extracts a map of name/value pairs from the given string. Names are
223     * expected to be unique. Multiple separators may be specified and
224     * the earliest found in the input string is used.
225     *
226     * @param str the string that contains a sequence of name/value pairs
227     * @param separators the name/value pairs separators
228     *
229     * @return a map of name/value pairs
230     */
231    public Map<String, String> parse(final String str, char[] separators) {
232        if (separators == null || separators.length == 0) {
233            return new HashMap<String, String>();
234        }
235        char separator = separators[0];
236        if (str != null) {
237            int idx = str.length();
238            for (char separator2 : separators) {
239                int tmp = str.indexOf(separator2);
240                if (tmp != -1 && tmp < idx) {
241                    idx = tmp;
242                    separator = separator2;
243                }
244            }
245        }
246        return parse(str, separator);
247    }
248
249    /**
250     * Extracts a map of name/value pairs from the given string. Names are
251     * expected to be unique.
252     *
253     * @param str the string that contains a sequence of name/value pairs
254     * @param separator the name/value pairs separator
255     *
256     * @return a map of name/value pairs
257     */
258    public Map<String, String> parse(final String str, char separator) {
259        if (str == null) {
260            return new HashMap<String, String>();
261        }
262        return parse(str.toCharArray(), separator);
263    }
264
265    /**
266     * Extracts a map of name/value pairs from the given array of
267     * characters. Names are expected to be unique.
268     *
269     * @param charArray the array of characters that contains a sequence of
270     * name/value pairs
271     * @param separator the name/value pairs separator
272     *
273     * @return a map of name/value pairs
274     */
275    public Map<String, String> parse(final char[] charArray, char separator) {
276        if (charArray == null) {
277            return new HashMap<String, String>();
278        }
279        return parse(charArray, 0, charArray.length, separator);
280    }
281
282    /**
283     * Extracts a map of name/value pairs from the given array of
284     * characters. Names are expected to be unique.
285     *
286     * @param charArray the array of characters that contains a sequence of
287     * name/value pairs
288     * @param offset - the initial offset.
289     * @param length - the length.
290     * @param separator the name/value pairs separator
291     *
292     * @return a map of name/value pairs
293     */
294    public Map<String, String> parse(
295        final char[] charArray,
296        int offset,
297        int length,
298        char separator) {
299
300        if (charArray == null) {
301            return new HashMap<String, String>();
302        }
303        HashMap<String, String> params = new HashMap<String, String>();
304        this.chars = charArray;
305        this.pos = offset;
306        this.len = length;
307
308        String paramName = null;
309        String paramValue = null;
310        while (hasChar()) {
311            paramName = parseToken(new char[] {
312                    '=', separator });
313            paramValue = null;
314            if (hasChar() && (charArray[pos] == '=')) {
315                pos++; // skip '='
316                paramValue = parseQuotedToken(new char[] {
317                        separator });
318
319                if (paramValue != null) {
320                    try {
321                        paramValue = MimeUtility.decodeText(paramValue);
322                    } catch (UnsupportedEncodingException e) {
323                        // let's keep the original value in this case
324                    }
325                }
326            }
327            if (hasChar() && (charArray[pos] == separator)) {
328                pos++; // skip separator
329            }
330            if ((paramName != null) && (paramName.length() > 0)) {
331                if (this.lowerCaseNames) {
332                    paramName = paramName.toLowerCase(Locale.ENGLISH);
333                }
334
335                params.put(paramName, paramValue);
336            }
337        }
338        return params;
339    }
340
341}