View Javadoc

1   /*
2   Wotonomy: OpenStep design patterns for pure Java applications.
3   Copyright (C) 2000 Blacksmith, Inc.
4   
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9   
10  This library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  Lesser General Public License for more details.
14  
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, see http://www.gnu.org
17  */
18  
19  package net.wotonomy.foundation.internal;
20  
21  import java.util.*; //collections
22  import java.io.*;
23  
24  /***
25   * PropertyListParser can parse a property list (plist) file or string, and
26   * return the top-level object represented by the plist. <p>
27   * 
28   * A property list is a heirarchical data structure containing only Maps,
29   * Lists, and Strings -- nothing else.  In other words, a property list is
30   * either a Map, List, or String instance, with the restrictions that the
31   * collections may only contain Map, List, or String instances. <p>
32   * 
33   * This class can read a particularly-formatted string or file, and create
34   * the property list structure described.  It provides a convenient means
35   * for having a structured data file, letting programs simply deal with the
36   * structure rather than having to do a lot of string parsing work as well.
37   * The concept is similar to Properties files, except that the values can
38   * be nested Maps or Lists instead of only Strings. <p>
39   * 
40   * A Map is specified in a file by key/value pairs surrounded by brace
41   * characters.  An equal sign (=) must be between the key and value, and
42   * there must be a semicolon (;) following the value.
43   *
44   * <pre>
45   *     {
46   *         key1 = value1;
47   *         key2 = value2;
48   *         etc...
49   *     }
50   * </pre>
51   *
52   * A List is specified by a comma-separated list of values surrounded by parentheses, like:
53   * <pre>
54   *     ( value1, value2, value3, etc... )
55   * </pre>
56   *
57   * A String can either be quoted in the manner of a constant string in
58   * Java, or unquoted.  If unquoted, the string can only contain
59   * alphanumerics, underscores (_), periods (.), dollar signs ($), colons
60   * (:), or forward slashes (/).  If any other character appears in the
61   * string, it must be quoted (i.e., surrounded by &quot; characters).
62   * Quoted strings may also contain \n, \t, \f, \v, \b, and \a escapes,
63   * octal escapes of the form \000, and unicode escapes of the form of \U
64   * followed by four hexadecimal characters.  Any other character escaped
65   * by a backslash will be treated as that character, and the escaping
66   * backslash character will be omitted.  Thus, to represent an actual
67   * backslash, it must appear as // in the quoted string. <p>
68   * 
69   * All whitespace between elements is ignored, and both //-style and
70   * /*-style comments are allowed to appear anywhere between elements. <p>
71   * 
72   * If there are any syntax errors encountered while parsing,
73   * RuntimeExceptions are thrown with the line number and column of the
74   * problem. <p>
75   * 
76   * Currenty, HashMaps and ArrayLists are the actual Map and List classes
77   * used when creating the property list. <p>
78   * 
79   * Examples: <p><blockquote>
80   <pre>
81     // This plist file represents a Map, since it starts with a '{'.
82     {
83         Map1 = { subkey1 = "foo"; };
84         Map2 =
85         {
86             "key1"  = "This is a quoted string.";
87             "key 2" = "bar\nbaz";    // the value has a newline in it
88             key3    = ("a", b, c, "quux quux");   // a List of four Strings
89         };  // We need a semicolon here, since it's following the value of the "Map2" key
90  
91         List1 = (foobar,foobaz,"foo,baz", (aa, ab, ac)); // a List of 3 Strings and a List
92  
93         // And now a List of two Maps
94         List2 = (
95             {
96                 key1 = value1;
97                 key2 = "value 2";
98                 key3 = (a,b,c,d);
99                 key4 = ();
100            },  // We need the comma here
101            {
102                key1 = {};  // an empty Map
103                key2 = "another String value";
104            }
105        );
106    }
107  </pre>
108  </blockquote>
109  * For those wondering, this is essentially a re-implementation of
110  * NeXT/Apple's property lists, except that data values are not supported.
111  *
112  * @author clindberg@blacksmith.com
113  * @version $Revision: 899 $
114  */
115 
116 public class PropertyListParser
117 {
118     private char buffer[];
119     private int currIndex;
120     private int lineNumber;
121     private int currLineStartIndex;
122 
123     /*** Reads an object (String, List, or Map) from plistString and returns it.
124      *  RuntimeExceptions are raised if there are parse problems.
125      */
126     public static Object propertyListFromString(String plistString)
127     {
128         PropertyListParser parser = new PropertyListParser(plistString);
129         return parser.readTopLevelObject();
130     }
131 
132     /***
133      * Reads all remaining characters from the Reader, and returns the
134      * result of propertyListFromString().  RuntimeExceptions are raised if
135      * there are parse problems
136      */
137     public static Object propertyListFromReader(Reader reader) throws IOException
138     {
139         char         charBuffer[] = new char[2048];
140         StringBuffer stringBuffer = new StringBuffer();
141         int          numRead = 0;
142 
143         while (numRead >= 0)
144         {
145             numRead = reader.read(charBuffer);
146             if (numRead > 0) stringBuffer.append(charBuffer, 0, numRead);
147         }
148 
149         return propertyListFromString(stringBuffer.toString());
150     }
151 
152     /***
153      * Reads the contents of the specified file, and parses the contents.
154      * If any error occurs, prints out a message using System.out.println()
155      * and returns null.
156      */
157     public static Object propertyListFromFile(String filename)
158     {
159         try {
160             FileInputStream stream = new FileInputStream(filename);
161             return propertyListFromReader(new InputStreamReader(stream));
162         } catch (Exception exception) {
163             String errorMessage = exception.getMessage();
164             System.out.println("Error parsing property list from "+filename+": "+errorMessage);
165         }
166 
167         return null;
168     }
169 
170     /***
171      * Creates a new PropertyListParser to parse the contents of the
172      * specified String.
173      */
174     public PropertyListParser(String plistString)
175     {
176         this(plistString.toCharArray());
177     }
178 
179     /***
180      * Creates a new PropertyListParser to parse the specified char array.
181      */
182     public PropertyListParser(char[] charArray)
183     {
184         buffer = charArray;
185         lineNumber = 1;
186         currLineStartIndex = 1;
187         currIndex = 0;
188     }
189 
190     public Object readTopLevelObject()
191     {
192         Object plist = readObject();
193 
194         skipCommentWhitespace();
195         if (!isAtEnd())
196         {
197             throwParseException("Extra characters in plist string after parsing object.  A plist should only contain one top-level object.");
198         }
199 
200         return plist;
201     }
202 
203     private void throwParseException(String errorMessage)
204     {
205         int column = currIndex - currLineStartIndex + 1;
206         throw new RuntimeException(errorMessage + " (Line " + lineNumber + ", column " + column + ")");
207     }
208 
209     private void updateLineNumberWithIndex(int lineStartIndex)
210     {
211         lineNumber++;
212         currLineStartIndex = lineStartIndex;
213     }
214 
215     private boolean isAtEnd()
216     {
217         return currIndex >= buffer.length;
218     }
219 
220     private void skipDoubleslashComment()
221     {
222         while (!isAtEnd() && buffer[currIndex] != '\n') {
223             currIndex++;
224         }
225     }
226 
227     private void skipStandardCComment()
228     {
229         currIndex++;  //skip over the starting '/'
230 
231         while (!isAtEnd())
232         {
233             if (buffer[currIndex] == '\n')
234                 updateLineNumberWithIndex(currIndex+1);
235 
236             currIndex++;
237 
238             if (buffer[currIndex-2] == '*' && buffer[currIndex-1] == '/')
239             {
240                 return;
241             }
242         }
243 
244         throwParseException("Input exhausted while parsing comment");
245     }
246 
247     private void skipWhitespace()
248     {
249         while (!isAtEnd() && isWhitespace(buffer[currIndex]))
250         {
251             if (buffer[currIndex] == '\n')
252                 updateLineNumberWithIndex(currIndex+1);
253             currIndex++;
254         }
255     }
256 
257     private void skipCommentWhitespace()
258     {
259         boolean done = false;
260 
261         while (!done)
262         {
263             done = true;
264 
265             skipWhitespace();
266             if ((buffer.length - currIndex) > 1 && buffer[currIndex] == '/')
267             {
268                 if (buffer[currIndex+1] == '/') {
269                     done = false; //iterate again
270                     skipDoubleslashComment();
271                 }
272                 else if (buffer[currIndex+1] == '*') {
273                     done = false; //iterate again
274                     skipStandardCComment();
275                 }
276             }
277         }
278     }
279 
280     private Object readObject()
281     {
282         skipCommentWhitespace();
283         if (isAtEnd()) return null;
284 
285         // Data (i.e. byte[]) not supported
286         if (buffer[currIndex] == '"')
287             return readQuotedString();
288         if (buffer[currIndex] == '(')
289             return readList();
290         if (buffer[currIndex] == '{')
291             return readMap();
292 
293         return readUnquotedString();
294     }
295 
296     private static final byte valueForHexDigit(char c)
297     {
298         if(c >= '0' && c <= '9') return (byte)(c - '0');
299         if(c >= 'a' && c <= 'f') return (byte)((c - 'a') + 10);
300         if(c >= 'A' && c <= 'F') return (byte)((c - 'A') + 10);
301 
302         return 0;
303     }
304 
305     private static final boolean isOctalDigit(char c)
306     {
307         return c >= '0' && c <= '7';
308     }
309 
310     private static final boolean isHexDigit(char c)
311     {
312         return (c >= '0' && c <= '9') ||
313                (c >= 'a' && c <= 'f') ||
314                (c >= 'A' && c <= 'F');
315     }
316 
317     private static String unquotedStringChars = "._$:/";   // chars allowed in unquoted strings
318     private static String whitespaceChars = " \t\n\r\f";
319 
320     private static final boolean isWhitespace(char c)
321     {
322         return whitespaceChars.indexOf(c) >= 0;
323     }
324 
325     private static final boolean isValidUnquotedStringChar(char c)
326     {
327         return ((c >= 'a' && c <= 'z') ||
328                 (c >= 'A' && c <= 'Z') ||
329                 (c >= '0' && c <= '9') ||
330                 unquotedStringChars.indexOf(c) >= 0);
331     }
332 
333     private String readUnquotedString()
334     {
335         int startIndex = currIndex;
336 
337         while (!isAtEnd() && isValidUnquotedStringChar(buffer[currIndex]))
338             currIndex++;
339 
340         if (startIndex == currIndex)
341             throwParseException("No allowable characters found to parse unquoted string");
342 
343         return new String(buffer, startIndex, currIndex - startIndex);
344     }
345 
346     private String readQuotedString()
347     {
348         currIndex++;  //skip over '"'
349 
350         StringBuffer stringBuffer = new StringBuffer();
351         int          startIndex = currIndex;
352 
353         while (!isAtEnd() && buffer[currIndex] != '"')
354         {
355             if (buffer[currIndex] != '//')
356             {
357                 if (buffer[currIndex] == '\n')
358                     updateLineNumberWithIndex(currIndex+1);
359 
360                 /*
361                  * Just increment the index -- all these characters will be
362                  * appended in chunks, either before an escape sequence or
363                  * at the end.
364                  */
365                 currIndex++;
366             }
367             else  // it's an escape
368             {
369                 /* Append anything scanned past before the '//' */
370                 if (startIndex < currIndex)
371                     stringBuffer.append(buffer, startIndex, currIndex - startIndex);
372                 currIndex++; // skip over '//'
373 
374                 if (isAtEnd())
375                     throwParseException("Input exhausted while parsing escape sequence");
376 
377                 switch (buffer[currIndex])
378                 {
379                     case 't': stringBuffer.append('\t'); currIndex++; break;   // tab
380                     case 'n': stringBuffer.append('\n'); currIndex++; break;   // newline
381                     case 'r': stringBuffer.append('\r'); currIndex++; break;   // carriage return
382                     case 'f': stringBuffer.append('\f'); currIndex++; break;   // form feed
383                     case 'b': stringBuffer.append('\b'); currIndex++; break;   // backspace
384                     case 'a': stringBuffer.append('\007'); currIndex++; break; // bell
385                     case 'v': stringBuffer.append('\013'); currIndex++; break; // vertical tab
386                     case 'U':
387                     case 'u':
388                     {
389                         /* A Unicode escape.  Always followed by 4 hex digits. */
390                         currIndex++; // skip past the 'U'
391                         if ((currIndex+4) > buffer.length)
392                             throwParseException("Not enough chars to parse //U sequence");
393 
394                         if(!isHexDigit(buffer[currIndex])   || !isHexDigit(buffer[currIndex+1]) ||
395                            !isHexDigit(buffer[currIndex+2]) || !isHexDigit(buffer[currIndex+3]))
396                         {
397                             throwParseException("Four hex digits not found for //U sequence");
398                         }
399 
400                         byte byte3 = valueForHexDigit(buffer[currIndex]);
401                         byte byte2 = valueForHexDigit(buffer[currIndex+1]);
402                         byte byte1 = valueForHexDigit(buffer[currIndex+2]);
403                         byte byte0 = valueForHexDigit(buffer[currIndex+3]);
404                         char theChar = (char)((byte3 << 12) + (byte2 << 8) + (byte1 << 4) + byte0);
405                         stringBuffer.append(theChar);
406                         currIndex += 4;
407                         break;
408                     }
409                     case '0': case '1': case '2': case '3':
410                     case '4': case '5': case '6': case '7':
411                     {
412                         /* An octal escape.  Expect 1, 2, or 3 octal digits. */
413                         int digits = 0;
414                         int value = 0;
415 
416                         do {
417                             value *= 8;
418                             value += (int)(buffer[currIndex] - '0');
419                             currIndex++;
420                             digits++;
421                         } while (digits <= 3 && !isAtEnd() && isOctalDigit(buffer[currIndex]));
422 
423                         if (value > 255)
424                             throwParseException("Value too large in octal escape sequence (> 0377)");
425 
426                         // This assumes value is in ISO Latin 1 encoding
427                         stringBuffer.append((char)value);
428                         break;
429                     }
430                     /* I guess plists can't have the \x{HEX}{HEX} escapes */
431                     default:
432                     {
433                         // Unknown escape sequence, just add the character.
434                         // GCC warns if this isn't a '"', '\'', or '//'...
435                         stringBuffer.append(buffer[currIndex]);
436                         if (buffer[currIndex] == '\n')
437                             updateLineNumberWithIndex(currIndex+1);
438                         currIndex++;
439                         break;
440                     }
441                 } // end case
442 
443                 /* Reset startIndex, so a verbatim copy will now start from this index */
444                 startIndex = currIndex;
445 
446             } //end '//' escape
447         }
448 
449         if (isAtEnd())
450             throwParseException("Input exhausted while parsing quoted string");
451         if (startIndex < currIndex)
452             stringBuffer.append(buffer, startIndex, currIndex - startIndex);
453         currIndex++; //skip past '"'
454 
455         return stringBuffer.toString();
456     }
457 
458     private List readList()
459     {
460         List newList = new ArrayList();
461 
462         currIndex++;  //skip over '('
463         skipCommentWhitespace();
464         while (!isAtEnd() && buffer[currIndex] != ')')
465         {
466             /* A comma is required between list elements */
467             if (newList.size() > 0)
468             {
469                 if (buffer[currIndex] != ',')
470                     throwParseException("List parsing failed: expecting ','");
471                 currIndex++;
472                 skipCommentWhitespace();
473                 if (isAtEnd())
474                     throwParseException("Input exhausted while parsing list");
475             }
476 
477             if (buffer[currIndex] != ')')
478             {
479                 Object plistObject = readObject();
480                 if (plistObject == null)
481                     throwParseException("List parsing failed: could not read contained object.");
482                 newList.add(plistObject);
483                 skipCommentWhitespace();
484             }
485         }
486 
487         if (isAtEnd())
488             throwParseException("Input exhausted while parsing list");
489         currIndex++; //skip past ')'
490 
491         return newList;
492     }
493 
494     private Map readMap()
495     {
496         HashMap newMap = new HashMap();
497 
498         currIndex++; // skip over open brace
499         skipCommentWhitespace();
500 
501         while (!isAtEnd() && buffer[currIndex] != '}')
502         {
503             Object key;
504             Object value;
505 
506             key = readObject();
507             if (key == null || !(key instanceof String))
508                 throwParseException("Map parsing failed: could not parse key or key is not a String");
509 
510             skipCommentWhitespace();
511             if (isAtEnd() || buffer[currIndex] != '=')
512                 throwParseException("Map parsing failed: expecting '='");
513             currIndex++;  //skip over '='
514             skipCommentWhitespace();
515             if (isAtEnd())
516                 throwParseException("Input exhausted while parsing map");
517 
518             value = readObject();
519             if (value == null)
520                 throwParseException("Map parsing failed: could not parse value object");
521 
522             skipCommentWhitespace();
523             if (isAtEnd() || buffer[currIndex] != ';')
524                 throwParseException("Map parsing failed: expecting ';'");
525             currIndex++;  //skip over ';'
526             skipCommentWhitespace();
527 
528             newMap.put(key, value);
529         }
530 
531         if (isAtEnd())
532             throwParseException("Input exhausted while parsing map");
533         currIndex++; //skip past '}'
534 
535         return newMap;
536     }
537 
538 
539     public static void main(String[] args)
540     {
541         String filename = args[0];
542         Object plist = PropertyListParser.propertyListFromFile(filename);
543         System.out.println(plist);
544     }
545 }
546