Contiki 3.x
aql-lexer.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010, Swedish Institute of Computer Science
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the Institute nor the names of its contributors
14  * may be used to endorse or promote products derived from this software
15  * without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /**
31  * \file
32  * Lexical analyzer for AQL, the Antelope Query Language.
33  * \author
34  * Nicolas Tsiftes <nvt@sics.se>
35  */
36 
37 #include "aql.h"
38 
39 #include <ctype.h>
40 #include <errno.h>
41 #include <limits.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 
46 struct keyword {
47  char *string;
48  token_t token;
49 };
50 
51 /* The keywords are arranged primarily by length and
52  secondarily by expected lookup frequency. */
53 static const struct keyword keywords[] = {
54  {";", END},
55  {"(", LEFT_PAREN},
56  {")", RIGHT_PAREN},
57  {",", COMMA},
58  {"=", EQUAL},
59  {">", GT},
60  {"<", LT},
61  {".", DOT},
62  {"+", ADD},
63  {"-", SUB},
64  {"*", MUL},
65  {"/", DIV},
66  {"#", COMMENT},
67 
68  {">=", GEQ},
69  {"<=", LEQ},
70  {"<>", NOT_EQUAL},
71  {"<-", ASSIGN},
72  {"OR", OR},
73  {"IS", IS},
74  {"ON", ON},
75  {"IN", IN},
76 
77  {"AND", AND},
78  {"NOT", NOT},
79  {"SUM", SUM},
80  {"MAX", MAX},
81  {"MIN", MIN},
82  {"INT", INT},
83 
84  {"INTO", INTO},
85  {"FROM", FROM},
86  {"MEAN", MEAN},
87  {"JOIN", JOIN},
88  {"LONG", LONG},
89  {"TYPE", TYPE},
90 
91  {"WHERE", WHERE},
92  {"COUNT", COUNT},
93  {"INDEX", INDEX},
94 
95  {"INSERT", INSERT},
96  {"SELECT", SELECT},
97  {"REMOVE", REMOVE},
98  {"CREATE", CREATE},
99  {"MEDIAN", MEDIAN},
100  {"DOMAIN", DOMAIN},
101  {"STRING", STRING},
102  {"INLINE", INLINE},
103 
104  {"PROJECT", PROJECT},
105  {"MAXHEAP", MAXHEAP},
106  {"MEMHASH", MEMHASH},
107 
108  {"RELATION", RELATION},
109 
110  {"ATTRIBUTE", ATTRIBUTE}
111 };
112 
113 /* Provides a pointer to the first keyword of a specific length. */
114 static const int8_t skip_hint[] = {0, 13, 21, 27, 33, 36, 44, 47, 48};
115 
116 static char separators[] = "#.;,() \t\n";
117 
118 int
119 lexer_start(lexer_t *lexer, char *input, token_t *token, value_t *value)
120 {
121  lexer->input = input;
122  lexer->prev_pos = input;
123  lexer->token = token;
124  lexer->value = value;
125 
126  return 0;
127 }
128 
129 static token_t
130 get_token_id(const char *string, const size_t length)
131 {
132  int start, end;
133  int i;
134 
135  if(sizeof(skip_hint) < length || length < 1) {
136  return NONE;
137  }
138 
139 
140  start = skip_hint[length - 1];
141  if(sizeof(skip_hint) == length) {
142  end = sizeof(keywords) / sizeof(keywords[0]);
143  } else {
144  end = skip_hint[length];
145  }
146 
147  for(i = start; i < end; i++) {
148  if(strncasecmp(keywords[i].string, string, length) == 0) {
149  return keywords[i].token;
150  }
151  }
152 
153  return NONE;
154 }
155 
156 static int
157 next_real(lexer_t *lexer, const char *s)
158 {
159  char *end;
160  long long_value;
161 #if DB_FEATURE_FLOATS
162  float float_value;
163 #endif /* DB_FEATURE_FLOATS */
164 
165  errno = 0;
166  long_value = strtol(s, &end, 10);
167 
168 #if DB_FEATURE_FLOATS
169  if(*end == '.') {
170  /* Process a float value. */
171  float_value = strtof(s, &end);
172  if(float_value == 0 && s == end) {
173  return -1;
174  }
175  memcpy(lexer->value, &float_value, sizeof(float_value));
176  *lexer->token = FLOAT_VALUE;
177  lexer->input = end;
178 
179  return 1;
180  }
181 #endif /* DB_FEATURE_FLOATS */
182 
183  /* Process an integer value. */
184  if(long_value == 0 && errno != 0) {
185  return -1;
186  }
187  memcpy(lexer->value, &long_value, sizeof(long_value));
188  *lexer->token = INTEGER_VALUE;
189  lexer->input = end;
190 
191  return 1;
192 }
193 
194 static int
195 next_string(lexer_t *lexer, const char *s)
196 {
197  char *end;
198  size_t length;
199 
200  end = strchr(s, '\'');
201  if(end == NULL) {
202  return -1;
203  }
204 
205  length = end - s;
206  *lexer->token = STRING_VALUE;
207  lexer->input = end + 1; /* Skip the closing delimiter. */
208 
209  memcpy(lexer->value, s, length);
210  (*lexer->value)[length] = '\0';
211 
212  return 1;
213 }
214 
215 static int
216 next_token(lexer_t *lexer, const char *s)
217 {
218  size_t length;
219 
220  length = strcspn(s, separators);
221  if(length == 0) {
222  /* We encountered a separator, so we try to get a token of
223  precisely 1 byte. */
224  length = 1;
225  }
226 
227  *lexer->token = get_token_id(s, length);
228  lexer->input = s + length;
229  if(*lexer->token != NONE) {
230  return 1;
231  }
232 
233  /* The input did not constitute a valid token,
234  so we regard it as an identifier. */
235 
236  *lexer->token = IDENTIFIER;
237 
238  memcpy(lexer->value, s, length);
239  (*lexer->value)[length] = '\0';
240 
241  return 1;
242 }
243 
244 int
245 lexer_next(lexer_t *lexer)
246 {
247  const char *s;
248 
249  *lexer->token = NONE;
250  s = lexer->input;
251  s += strspn(s, " \t\n");
252  lexer->prev_pos = s;
253 
254  switch(*s) {
255  case '\'':
256  /* Process the string that follows the delimiter. */
257  return next_string(lexer, s + 1);
258  case '\0':
259  return 0;
260  default:
261  if(isdigit((int)*s) || (*s == '-' && isdigit((int)s[1]))) {
262  return next_real(lexer, s);
263  }
264 
265  /* Process a token. */
266  return next_token(lexer, s);
267  }
268 }
269 
270 void
271 lexer_rewind(lexer_t *lexer)
272 {
273  lexer->input = lexer->prev_pos;
274 }
Definitions and declarations for AQL, the Antelope Query Language.
#define NULL
The null pointer.