001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import org.apache.commons.codec.EncoderException;
021import org.apache.commons.codec.StringEncoder;
022
023/**
024 * Encodes a string into a double metaphone value.
025 * This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>.
026 * <ul>
027 * <li>Original Article: <a 
028 * href="http://www.cuj.com/documents/s=8038/cuj0006philips/">
029 * http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li>
030 * <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">
031 * ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li>
032 * </ul>
033 * 
034 * @author Apache Software Foundation
035 * @version $Id: DoubleMetaphone.java 800153 2009-08-02 22:45:30Z ggregory $
036 */
037public class DoubleMetaphone implements StringEncoder {
038
039    /**
040     * "Vowels" to test for
041     */
042    private static final String VOWELS = "AEIOUY";
043
044    /**
045     * Prefixes when present which are not pronounced
046     */
047    private static final String[] SILENT_START = 
048    { "GN", "KN", "PN", "WR", "PS" };
049    private static final String[] L_R_N_M_B_H_F_V_W_SPACE = 
050    { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
051    private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = 
052    { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
053    private static final String[] L_T_K_S_N_M_B_Z = 
054    { "L", "T", "K", "S", "N", "M", "B", "Z" };
055
056    /**
057     * Maximum length of an encoding, default is 4
058     */
059    protected int maxCodeLen = 4;
060
061    /**
062     * Creates an instance of this DoubleMetaphone encoder
063     */
064    public DoubleMetaphone() {
065        super();
066    }
067    
068    /**
069     * Encode a value with Double Metaphone
070     *
071     * @param value String to encode
072     * @return an encoded string
073     */
074    public String doubleMetaphone(String value) {
075        return doubleMetaphone(value, false);
076    }
077    
078    /**
079     * Encode a value with Double Metaphone, optionally using the alternate
080     * encoding.
081     *
082     * @param value String to encode
083     * @param alternate use alternate encode
084     * @return an encoded string
085     */
086    public String doubleMetaphone(String value, boolean alternate) {
087        value = cleanInput(value);
088        if (value == null) {
089            return null;
090        }
091        
092        boolean slavoGermanic = isSlavoGermanic(value);
093        int index = isSilentStart(value) ? 1 : 0;
094        
095        DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
096        
097        while (!result.isComplete() && index <= value.length() - 1) {
098            switch (value.charAt(index)) {
099            case 'A':
100            case 'E':
101            case 'I':
102            case 'O':
103            case 'U':
104            case 'Y':
105                index = handleAEIOUY(value, result, index);
106                break;
107            case 'B':
108                result.append('P');
109                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
110                break;
111            case '\u00C7':
112                // A C with a Cedilla
113                result.append('S');
114                index++;
115                break; 
116            case 'C':
117                index = handleC(value, result, index);
118                break;
119            case 'D':
120                index = handleD(value, result, index);
121                break;
122            case 'F':
123                result.append('F');
124                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
125                break;
126            case 'G':
127                index = handleG(value, result, index, slavoGermanic);
128                break;
129            case 'H':
130                index = handleH(value, result, index);
131                break;
132            case 'J':
133                index = handleJ(value, result, index, slavoGermanic);
134                break;
135            case 'K':
136                result.append('K');
137                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
138                break;
139            case 'L':
140                index = handleL(value, result, index);
141                break;
142            case 'M':
143                result.append('M');
144                index = conditionM0(value, index) ? index + 2 : index + 1;
145                break;
146            case 'N':
147                result.append('N');
148                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
149                break;
150            case '\u00D1':
151                // N with a tilde (spanish ene)
152                result.append('N');
153                index++;
154                break;
155            case 'P':
156                index = handleP(value, result, index);
157                break;
158            case 'Q':
159                result.append('K');
160                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
161                break;
162            case 'R':
163                index = handleR(value, result, index, slavoGermanic);
164                break;
165            case 'S':
166                index = handleS(value, result, index, slavoGermanic);
167                break;
168            case 'T':
169                index = handleT(value, result, index);
170                break;
171            case 'V':
172                result.append('F');
173                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
174                break;
175            case 'W':
176                index = handleW(value, result, index);
177                break;
178            case 'X':
179                index = handleX(value, result, index);
180                break;
181            case 'Z':
182                index = handleZ(value, result, index, slavoGermanic);
183                break;
184            default:
185                index++;
186                break;
187            }
188        }
189
190        return alternate ? result.getAlternate() : result.getPrimary();
191    }
192    
193    /**
194     * Encode the value using DoubleMetaphone.  It will only work if 
195     * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
196     *
197     * @param obj Object to encode (should be of type String)
198     * @return An encoded Object (will be of type String)
199     * @throws EncoderException encode parameter is not of type String
200     */
201    public Object encode(Object obj) throws EncoderException {
202        if (!(obj instanceof String)) {
203            throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); 
204        } 
205        return doubleMetaphone((String) obj);
206    }
207
208    /**
209     * Encode the value using DoubleMetaphone.
210     *
211     * @param value String to encode
212     * @return An encoded String
213     */
214    public String encode(String value) {
215        return doubleMetaphone(value);   
216    }
217
218    /**
219     * Check if the Double Metaphone values of two <code>String</code> values
220     * are equal.
221     * 
222     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
223     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
224     * @return <code>true</code> if the encoded <code>String</code>s are equal;
225     *          <code>false</code> otherwise.
226     * @see #isDoubleMetaphoneEqual(String,String,boolean)
227     */
228    public boolean isDoubleMetaphoneEqual(String value1, String value2) {
229        return isDoubleMetaphoneEqual(value1, value2, false);
230    }
231    
232    /**
233     * Check if the Double Metaphone values of two <code>String</code> values
234     * are equal, optionally using the alternate value.
235     * 
236     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
237     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
238     * @param alternate use the alternate value if <code>true</code>.
239     * @return <code>true</code> if the encoded <code>String</code>s are equal;
240     *          <code>false</code> otherwise.
241     */
242    public boolean isDoubleMetaphoneEqual(String value1, 
243                                          String value2, 
244                                          boolean alternate) {
245        return doubleMetaphone(value1, alternate).equals(doubleMetaphone
246                                                         (value2, alternate));
247    }
248    
249    /**
250     * Returns the maxCodeLen.
251     * @return int
252     */
253    public int getMaxCodeLen() {
254        return this.maxCodeLen;
255    }
256
257    /**
258     * Sets the maxCodeLen.
259     * @param maxCodeLen The maxCodeLen to set
260     */
261    public void setMaxCodeLen(int maxCodeLen) {
262        this.maxCodeLen = maxCodeLen;
263    }
264
265    //-- BEGIN HANDLERS --//
266
267    /**
268     * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
269     */
270    private int handleAEIOUY(String value, DoubleMetaphoneResult result, int 
271                             index) {
272        if (index == 0) {
273            result.append('A');
274        }
275        return index + 1;
276    }
277    
278    /**
279     * Handles 'C' cases
280     */
281    private int handleC(String value, 
282                        DoubleMetaphoneResult result, 
283                        int index) {
284        if (conditionC0(value, index)) {  // very confusing, moved out
285            result.append('K');
286            index += 2;
287        } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
288            result.append('S');
289            index += 2;
290        } else if (contains(value, index, 2, "CH")) {
291            index = handleCH(value, result, index);
292        } else if (contains(value, index, 2, "CZ") && 
293                   !contains(value, index - 2, 4, "WICZ")) {
294            //-- "Czerny" --//
295            result.append('S', 'X');
296            index += 2;
297        } else if (contains(value, index + 1, 3, "CIA")) {
298            //-- "focaccia" --//
299            result.append('X');
300            index += 3;
301        } else if (contains(value, index, 2, "CC") && 
302                   !(index == 1 && charAt(value, 0) == 'M')) {
303            //-- double "cc" but not "McClelland" --//
304            return handleCC(value, result, index);
305        } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
306            result.append('K');
307            index += 2;
308        } else if (contains(value, index, 2, "CI", "CE", "CY")) {
309            //-- Italian vs. English --//
310            if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
311                result.append('S', 'X');
312            } else {
313                result.append('S');
314            }
315            index += 2;
316        } else {
317            result.append('K');
318            if (contains(value, index + 1, 2, " C", " Q", " G")) { 
319                //-- Mac Caffrey, Mac Gregor --//
320                index += 3;
321            } else if (contains(value, index + 1, 1, "C", "K", "Q") && 
322                       !contains(value, index + 1, 2, "CE", "CI")) {
323                index += 2;
324            } else {
325                index++;
326            }
327        }
328        
329        return index;
330    }
331
332    /**
333     * Handles 'CC' cases
334     */
335    private int handleCC(String value, 
336                         DoubleMetaphoneResult result, 
337                         int index) {
338        if (contains(value, index + 2, 1, "I", "E", "H") && 
339            !contains(value, index + 2, 2, "HU")) {
340            //-- "bellocchio" but not "bacchus" --//
341            if ((index == 1 && charAt(value, index - 1) == 'A') || 
342                contains(value, index - 1, 5, "UCCEE", "UCCES")) {
343                //-- "accident", "accede", "succeed" --//
344                result.append("KS");
345            } else {
346                //-- "bacci", "bertucci", other Italian --//
347                result.append('X');
348            }
349            index += 3;
350        } else {    // Pierce's rule
351            result.append('K');
352            index += 2;
353        }
354        
355        return index;
356    }
357    
358    /**
359     * Handles 'CH' cases
360     */
361    private int handleCH(String value, 
362                         DoubleMetaphoneResult result, 
363                         int index) {
364        if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
365            result.append('K', 'X');
366            return index + 2;
367        } else if (conditionCH0(value, index)) {
368            //-- Greek roots ("chemistry", "chorus", etc.) --//
369            result.append('K');
370            return index + 2;
371        } else if (conditionCH1(value, index)) {
372            //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
373            result.append('K');
374            return index + 2;
375        } else {
376            if (index > 0) {
377                if (contains(value, 0, 2, "MC")) {
378                    result.append('K');
379                } else {
380                    result.append('X', 'K');
381                }
382            } else {
383                result.append('X');
384            }
385            return index + 2;
386        }
387    }
388
389    /**
390     * Handles 'D' cases
391     */
392    private int handleD(String value, 
393                        DoubleMetaphoneResult result, 
394                        int index) {
395        if (contains(value, index, 2, "DG")) {
396            //-- "Edge" --//
397            if (contains(value, index + 2, 1, "I", "E", "Y")) {
398                result.append('J');
399                index += 3;
400                //-- "Edgar" --//
401            } else {
402                result.append("TK");
403                index += 2;
404            }
405        } else if (contains(value, index, 2, "DT", "DD")) {
406            result.append('T');
407            index += 2;
408        } else {
409            result.append('T');
410            index++;
411        }
412        return index;
413    }
414
415    /**
416     * Handles 'G' cases
417     */
418    private int handleG(String value, 
419                        DoubleMetaphoneResult result, 
420                        int index, 
421                        boolean slavoGermanic) {
422        if (charAt(value, index + 1) == 'H') {
423            index = handleGH(value, result, index);
424        } else if (charAt(value, index + 1) == 'N') {
425            if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
426                result.append("KN", "N");
427            } else if (!contains(value, index + 2, 2, "EY") && 
428                       charAt(value, index + 1) != 'Y' && !slavoGermanic) {
429                result.append("N", "KN");
430            } else {
431                result.append("KN");
432            }
433            index = index + 2;
434        } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
435            result.append("KL", "L");
436            index += 2;
437        } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
438            //-- -ges-, -gep-, -gel-, -gie- at beginning --//
439            result.append('K', 'J');
440            index += 2;
441        } else if ((contains(value, index + 1, 2, "ER") || 
442                    charAt(value, index + 1) == 'Y') &&
443                   !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
444                   !contains(value, index - 1, 1, "E", "I") && 
445                   !contains(value, index - 1, 3, "RGY", "OGY")) {
446            //-- -ger-, -gy- --//
447            result.append('K', 'J');
448            index += 2;
449        } else if (contains(value, index + 1, 1, "E", "I", "Y") || 
450                   contains(value, index - 1, 4, "AGGI", "OGGI")) {
451            //-- Italian "biaggi" --//
452            if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
453                //-- obvious germanic --//
454                result.append('K');
455            } else if (contains(value, index + 1, 3, "IER")) {
456                result.append('J');
457            } else {
458                result.append('J', 'K');
459            }
460            index += 2;
461        } else if (charAt(value, index + 1) == 'G') {
462            index += 2;
463            result.append('K');
464        } else {
465            index++;
466            result.append('K');
467        }
468        return index;
469    }
470    
471    /**
472     * Handles 'GH' cases
473     */
474    private int handleGH(String value, 
475                         DoubleMetaphoneResult result, 
476                         int index) {
477        if (index > 0 && !isVowel(charAt(value, index - 1))) {
478            result.append('K');
479            index += 2;
480        } else if (index == 0) {
481            if (charAt(value, index + 2) == 'I') {
482                result.append('J');
483            } else {
484                result.append('K');
485            }
486            index += 2;
487        } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
488                   (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
489                   (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
490            //-- Parker's rule (with some further refinements) - "hugh"
491            index += 2;
492        } else {
493            if (index > 2 && charAt(value, index - 1) == 'U' && 
494                contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
495                //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
496                result.append('F');
497            } else if (index > 0 && charAt(value, index - 1) != 'I') {
498                result.append('K');
499            }
500            index += 2;
501        }
502        return index;
503    }
504
505    /**
506     * Handles 'H' cases
507     */
508    private int handleH(String value, 
509                        DoubleMetaphoneResult result, 
510                        int index) {
511        //-- only keep if first & before vowel or between 2 vowels --//
512        if ((index == 0 || isVowel(charAt(value, index - 1))) && 
513            isVowel(charAt(value, index + 1))) {
514            result.append('H');
515            index += 2;
516            //-- also takes car of "HH" --//
517        } else {
518            index++;
519        }
520        return index;
521    }
522    
523    /**
524     * Handles 'J' cases
525     */
526    private int handleJ(String value, DoubleMetaphoneResult result, int index, 
527                        boolean slavoGermanic) {
528        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
529                //-- obvious Spanish, "Jose", "San Jacinto" --//
530                if ((index == 0 && (charAt(value, index + 4) == ' ') || 
531                     value.length() == 4) || contains(value, 0, 4, "SAN ")) {
532                    result.append('H');
533                } else {
534                    result.append('J', 'H');
535                }
536                index++;
537            } else {
538                if (index == 0 && !contains(value, index, 4, "JOSE")) {
539                    result.append('J', 'A');
540                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && 
541                              (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
542                    result.append('J', 'H');
543                } else if (index == value.length() - 1) {
544                    result.append('J', ' ');
545                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
546                    result.append('J');
547                }
548
549                if (charAt(value, index + 1) == 'J') {
550                    index += 2;
551                } else {
552                    index++;
553                }
554            }
555        return index;
556    }
557    
558    /**
559     * Handles 'L' cases
560     */
561    private int handleL(String value, 
562                        DoubleMetaphoneResult result, 
563                        int index) {
564        if (charAt(value, index + 1) == 'L') {
565            if (conditionL0(value, index)) {
566                result.appendPrimary('L');
567            } else {
568                result.append('L');
569            }
570            index += 2;
571        } else {
572            index++;
573            result.append('L');
574        }
575        return index;
576    }
577
578    /**
579     * Handles 'P' cases
580     */
581    private int handleP(String value, 
582                        DoubleMetaphoneResult result, 
583                        int index) {
584        if (charAt(value, index + 1) == 'H') {
585            result.append('F');
586            index += 2;
587        } else {
588            result.append('P');
589            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
590        }
591        return index;
592    }
593
594    /**
595     * Handles 'R' cases
596     */
597    private int handleR(String value, 
598                        DoubleMetaphoneResult result, 
599                        int index, 
600                        boolean slavoGermanic) {
601        if (index == value.length() - 1 && !slavoGermanic && 
602            contains(value, index - 2, 2, "IE") && 
603            !contains(value, index - 4, 2, "ME", "MA")) {
604            result.appendAlternate('R');
605        } else {
606            result.append('R');
607        }
608        return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
609    }
610
611    /**
612     * Handles 'S' cases
613     */
614    private int handleS(String value, 
615                        DoubleMetaphoneResult result, 
616                        int index, 
617                        boolean slavoGermanic) {
618        if (contains(value, index - 1, 3, "ISL", "YSL")) {
619            //-- special cases "island", "isle", "carlisle", "carlysle" --//
620            index++;
621        } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
622            //-- special case "sugar-" --//
623            result.append('X', 'S');
624            index++;
625        } else if (contains(value, index, 2, "SH")) {
626            if (contains(value, index + 1, 4, 
627                         "HEIM", "HOEK", "HOLM", "HOLZ")) {
628                //-- germanic --//
629                result.append('S');
630            } else {
631                result.append('X');
632            }
633            index += 2;
634        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
635            //-- Italian and Armenian --//
636            if (slavoGermanic) {
637                result.append('S');
638            } else {
639                result.append('S', 'X');
640            }
641            index += 3;
642        } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
643            //-- german & anglicisations, e.g. "smith" match "schmidt" //
644            // "snider" match "schneider" --//
645            //-- also, -sz- in slavic language altho in hungarian it //
646            //   is pronounced "s" --//
647            result.append('S', 'X');
648            index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
649        } else if (contains(value, index, 2, "SC")) {
650            index = handleSC(value, result, index);
651        } else {
652            if (index == value.length() - 1 && contains(value, index - 2, 
653                                                        2, "AI", "OI")){
654                //-- french e.g. "resnais", "artois" --//
655                result.appendAlternate('S');
656            } else {
657                result.append('S');
658            }
659            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
660        }
661        return index;
662    }
663
664    /**
665     * Handles 'SC' cases
666     */
667    private int handleSC(String value, 
668                         DoubleMetaphoneResult result, 
669                         int index) {
670        if (charAt(value, index + 2) == 'H') {
671            //-- Schlesinger's rule --//
672            if (contains(value, index + 3, 
673                         2, "OO", "ER", "EN", "UY", "ED", "EM")) {
674                //-- Dutch origin, e.g. "school", "schooner" --//
675                if (contains(value, index + 3, 2, "ER", "EN")) {
676                    //-- "schermerhorn", "schenker" --//
677                    result.append("X", "SK");
678                } else {
679                    result.append("SK");
680                }
681            } else {
682                if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
683                    result.append('X', 'S');
684                } else {
685                    result.append('X');
686                }
687            }
688        } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
689            result.append('S');
690        } else {
691            result.append("SK");
692        }
693        return index + 3;
694    }
695
696    /**
697     * Handles 'T' cases
698     */
699    private int handleT(String value, 
700                        DoubleMetaphoneResult result, 
701                        int index) {
702        if (contains(value, index, 4, "TION")) {
703            result.append('X');
704            index += 3;
705        } else if (contains(value, index, 3, "TIA", "TCH")) {
706            result.append('X');
707            index += 3;
708        } else if (contains(value, index, 2, "TH") || contains(value, index, 
709                                                               3, "TTH")) {
710            if (contains(value, index + 2, 2, "OM", "AM") || 
711                //-- special case "thomas", "thames" or germanic --//
712                contains(value, 0, 4, "VAN ", "VON ") || 
713                contains(value, 0, 3, "SCH")) {
714                result.append('T');
715            } else {
716                result.append('0', 'T');
717            }
718            index += 2;
719        } else {
720            result.append('T');
721            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
722        }
723        return index;
724    }
725
726    /**
727     * Handles 'W' cases
728     */
729    private int handleW(String value, 
730                        DoubleMetaphoneResult result, 
731                        int index) {
732        if (contains(value, index, 2, "WR")) {
733            //-- can also be in middle of word --//
734            result.append('R');
735            index += 2;
736        } else {
737            if (index == 0 && (isVowel(charAt(value, index + 1)) || 
738                               contains(value, index, 2, "WH"))) {
739                if (isVowel(charAt(value, index + 1))) {
740                    //-- Wasserman should match Vasserman --//
741                    result.append('A', 'F');
742                } else {
743                    //-- need Uomo to match Womo --//
744                    result.append('A');
745                }
746                index++;
747            } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
748                       contains(value, index - 1, 
749                                5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
750                       contains(value, 0, 3, "SCH")) {
751                //-- Arnow should match Arnoff --//
752                result.appendAlternate('F');
753                index++;
754            } else if (contains(value, index, 4, "WICZ", "WITZ")) {
755                //-- Polish e.g. "filipowicz" --//
756                result.append("TS", "FX");
757                index += 4;
758            } else {
759                index++;
760            }
761        }
762        return index;
763    }
764    
765    /**
766     * Handles 'X' cases
767     */
768    private int handleX(String value, 
769                        DoubleMetaphoneResult result, 
770                        int index) {
771        if (index == 0) {
772            result.append('S');
773            index++;
774        } else {
775            if (!((index == value.length() - 1) && 
776                  (contains(value, index - 3, 3, "IAU", "EAU") || 
777                   contains(value, index - 2, 2, "AU", "OU")))) {
778                //-- French e.g. breaux --//
779                result.append("KS");
780            }
781            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
782        }
783        return index;
784    }
785
786    /**
787     * Handles 'Z' cases
788     */
789    private int handleZ(String value, DoubleMetaphoneResult result, int index, 
790                        boolean slavoGermanic) {
791        if (charAt(value, index + 1) == 'H') {
792            //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
793            result.append('J');
794            index += 2;
795        } else {
796            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
797                result.append("S", "TS");
798            } else {
799                result.append('S');
800            }
801            index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
802        }
803        return index;
804    }
805
806    //-- BEGIN CONDITIONS --//
807
808    /**
809     * Complex condition 0 for 'C'
810     */
811    private boolean conditionC0(String value, int index) {
812        if (contains(value, index, 4, "CHIA")) {
813            return true;
814        } else if (index <= 1) {
815            return false;
816        } else if (isVowel(charAt(value, index - 2))) {
817            return false;
818        } else if (!contains(value, index - 1, 3, "ACH")) {
819            return false;
820        } else {
821            char c = charAt(value, index + 2);
822            return (c != 'I' && c != 'E') ||
823                    contains(value, index - 2, 6, "BACHER", "MACHER");
824        }
825    }
826    
827    /**
828     * Complex condition 0 for 'CH'
829     */
830    private boolean conditionCH0(String value, int index) {
831        if (index != 0) {
832            return false;
833        } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && 
834                   !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
835            return false;
836        } else if (contains(value, 0, 5, "CHORE")) {
837            return false;
838        } else {
839            return true;
840        }
841    }
842    
843    /**
844     * Complex condition 1 for 'CH'
845     */
846    private boolean conditionCH1(String value, int index) {
847        return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 
848                                                                   3, "SCH")) ||
849                contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
850                contains(value, index + 2, 1, "T", "S") ||
851                ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
852                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
853    }
854    
855    /**
856     * Complex condition 0 for 'L'
857     */
858    private boolean conditionL0(String value, int index) {
859        if (index == value.length() - 3 && 
860            contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
861            return true;
862        } else if ((contains(value, value.length() - 2, 2, "AS", "OS") || 
863                    contains(value, value.length() - 1, 1, "A", "O")) &&
864                   contains(value, index - 1, 4, "ALLE")) {
865            return true;
866        } else {
867            return false;
868        }
869    }
870    
871    /**
872     * Complex condition 0 for 'M'
873     */
874    private boolean conditionM0(String value, int index) {
875        if (charAt(value, index + 1) == 'M') {
876            return true;
877        }
878        return contains(value, index - 1, 3, "UMB") &&
879                ((index + 1) == value.length() - 1 || contains(value,
880                        index + 2, 2, "ER"));
881    }
882    
883    //-- BEGIN HELPER FUNCTIONS --//
884
885    /**
886     * Determines whether or not a value is of slavo-germanic orgin. A value is
887     * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
888     */
889    private boolean isSlavoGermanic(String value) {
890        return value.indexOf('W') > -1 || value.indexOf('K') > -1 || 
891            value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
892    }
893
894    /**
895     * Determines whether or not a character is a vowel or not
896     */
897    private boolean isVowel(char ch) {
898        return VOWELS.indexOf(ch) != -1;
899    }
900
901    /**
902     * Determines whether or not the value starts with a silent letter.  It will
903     * return <code>true</code> if the value starts with any of 'GN', 'KN',
904     * 'PN', 'WR' or 'PS'.
905     */    
906    private boolean isSilentStart(String value) {
907        boolean result = false;
908        for (int i = 0; i < SILENT_START.length; i++) {
909            if (value.startsWith(SILENT_START[i])) {
910                result = true;
911                break;
912            }
913        }
914        return result;
915    }
916
917    /**
918     * Cleans the input
919     */    
920    private String cleanInput(String input) {
921        if (input == null) {
922            return null;
923        }
924        input = input.trim();
925        if (input.length() == 0) {
926            return null;
927        }
928        return input.toUpperCase(java.util.Locale.ENGLISH);
929    }
930
931    /**
932     * Gets the character at index <code>index</code> if available, otherwise
933     * it returns <code>Character.MIN_VALUE</code> so that there is some sort
934     * of a default
935     */    
936    protected char charAt(String value, int index) {
937        if (index < 0 || index >= value.length()) {
938            return Character.MIN_VALUE;
939        } 
940        return value.charAt(index);
941    }
942
943    /**
944     * Shortcut method with 1 criteria
945     */    
946    private static boolean contains(String value, int start, int length, 
947                                    String criteria) {
948        return contains(value, start, length, 
949                        new String[] { criteria });
950    }
951
952    /**
953     * Shortcut method with 2 criteria
954     */    
955    private static boolean contains(String value, int start, int length, 
956                                    String criteria1, String criteria2) {
957        return contains(value, start, length, 
958                        new String[] { criteria1, criteria2 });
959    }
960
961    /**
962     * Shortcut method with 3 criteria
963     */    
964    private static boolean contains(String value, int start, int length, 
965                                    String criteria1, String criteria2, 
966                                    String criteria3) {
967        return contains(value, start, length, 
968                        new String[] { criteria1, criteria2, criteria3 });
969    }
970
971    /**
972     * Shortcut method with 4 criteria
973     */    
974    private static boolean contains(String value, int start, int length, 
975                                    String criteria1, String criteria2, 
976                                    String criteria3, String criteria4) {
977        return contains(value, start, length, 
978                        new String[] { criteria1, criteria2, criteria3, 
979                                       criteria4 });
980    }
981
982    /**
983     * Shortcut method with 5 criteria
984     */    
985    private static boolean contains(String value, int start, int length, 
986                                    String criteria1, String criteria2, 
987                                    String criteria3, String criteria4, 
988                                    String criteria5) {
989        return contains(value, start, length, 
990                        new String[] { criteria1, criteria2, criteria3, 
991                                       criteria4, criteria5 });
992    }
993
994    /**
995     * Shortcut method with 6 criteria
996     */    
997    private static boolean contains(String value, int start, int length, 
998                                    String criteria1, String criteria2, 
999                                    String criteria3, String criteria4, 
1000                                    String criteria5, String criteria6) {
1001        return contains(value, start, length, 
1002                        new String[] { criteria1, criteria2, criteria3, 
1003                                       criteria4, criteria5, criteria6 });
1004    }
1005    
1006    /**
1007     * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
1008     * matching up to length <code>length</code>
1009     */
1010    protected static boolean contains(String value, int start, int length, 
1011                                      String[] criteria) {
1012        boolean result = false;
1013        if (start >= 0 && start + length <= value.length()) {
1014            String target = value.substring(start, start + length);
1015
1016            for (int i = 0; i < criteria.length; i++) {
1017                if (target.equals(criteria[i])) {
1018                    result = true;
1019                    break;
1020                }
1021            }
1022        }
1023        return result;
1024    }
1025    
1026    //-- BEGIN INNER CLASSES --//
1027    
1028    /**
1029     * Inner class for storing results, since there is the optional alternate
1030     * encoding.
1031     */
1032    public class DoubleMetaphoneResult {
1033
1034        private StringBuffer primary = new StringBuffer(getMaxCodeLen());
1035        private StringBuffer alternate = new StringBuffer(getMaxCodeLen());
1036        private int maxLength;
1037
1038        public DoubleMetaphoneResult(int maxLength) {
1039            this.maxLength = maxLength;
1040        }
1041
1042        public void append(char value) {
1043            appendPrimary(value);
1044            appendAlternate(value);
1045        }
1046
1047        public void append(char primary, char alternate) {
1048            appendPrimary(primary);
1049            appendAlternate(alternate);
1050        }
1051
1052        public void appendPrimary(char value) {
1053            if (this.primary.length() < this.maxLength) {
1054                this.primary.append(value);
1055            }
1056        }
1057
1058        public void appendAlternate(char value) {
1059            if (this.alternate.length() < this.maxLength) {
1060                this.alternate.append(value);
1061            }
1062        }
1063
1064        public void append(String value) {
1065            appendPrimary(value);
1066            appendAlternate(value);
1067        }
1068
1069        public void append(String primary, String alternate) {
1070            appendPrimary(primary);
1071            appendAlternate(alternate);
1072        }
1073
1074        public void appendPrimary(String value) {
1075            int addChars = this.maxLength - this.primary.length();
1076            if (value.length() <= addChars) {
1077                this.primary.append(value);
1078            } else {
1079                this.primary.append(value.substring(0, addChars));
1080            }
1081        }
1082
1083        public void appendAlternate(String value) {
1084            int addChars = this.maxLength - this.alternate.length();
1085            if (value.length() <= addChars) {
1086                this.alternate.append(value);
1087            } else {
1088                this.alternate.append(value.substring(0, addChars));
1089            }
1090        }
1091
1092        public String getPrimary() {
1093            return this.primary.toString();
1094        }
1095
1096        public String getAlternate() {
1097            return this.alternate.toString();
1098        }
1099
1100        public boolean isComplete() {
1101            return this.primary.length() >= this.maxLength && 
1102                this.alternate.length() >= this.maxLength;
1103        }
1104    }
1105}