Contiki 3.x
htmlparser.c
1 /*
2  * Copyright (c) 2002, Adam Dunkels.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following
12  * disclaimer in the documentation and/or other materials provided
13  * with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  * products derived from this software without specific prior
16  * written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * This file is part of the Contiki desktop environment
31  *
32  *
33  */
34 
35 /* htmlparser.c:
36  *
37  * Implements a very simplistic HTML parser. It recognizes HTML links
38  * (<a href>-tags), HTML img alt tags, a few text flow break tags
39 G * (<br>, <p>, <h>), the <li> tag (but does not even try to
40  * distinguish between <ol> or <ul>) as well as HTML comment tags
41  * (<!-- -->).
42  *
43  * To save memory, the HTML parser is state machine driver, which
44  * means that it will shave off one character from the HTML page,
45  * process that character, and return to the next. Another way of
46  * doing it would be to buffer a number of characters and process them
47  * together.
48  *
49  * The main function in this file is the htmlparser_parse() function
50  * which takes a htmlparser_state structur and a part of an HTML file
51  * as an argument. The htmlparser_parse() function will call the
52  * helper functions parse_char() and parse_tag(). Those functions will
53  * in turn call the two callback functions htmlparser_char() and
54  * htmlparser_tag(). Those functions must be implemented by the using
55  * module (e.g., a web browser program).
56  *
57  * htmlparser_char() will be called for every non-tag character.
58  *
59  * htmlparser_tag() will be called whenever a full tag has been found.
60  *
61  */
62 
63 #include <string.h>
64 
65 #include "contiki.h"
66 #include "html-strings.h"
67 #include "www.h"
68 
69 #include "htmlparser.h"
70 
71 #if 1
72 #define PRINTF(x)
73 #else
74 #include <stdio.h>
75 #define PRINTF(x) printf x
76 #endif
77 
78 
79 /*-----------------------------------------------------------------------------------*/
80 #define ISO_A 0x41
81 #define ISO_B 0x42
82 #define ISO_E 0x45
83 #define ISO_F 0x46
84 #define ISO_G 0x47
85 #define ISO_H 0x48
86 #define ISO_I 0x49
87 #define ISO_L 0x4c
88 #define ISO_M 0x4d
89 #define ISO_P 0x50
90 #define ISO_R 0x52
91 #define ISO_T 0x54
92 
93 #define ISO_a (ISO_A | 0x20)
94 #define ISO_b (ISO_B | 0x20)
95 #define ISO_e (ISO_E | 0x20)
96 #define ISO_f (ISO_F | 0x20)
97 #define ISO_g (ISO_G | 0x20)
98 #define ISO_h (ISO_H | 0x20)
99 #define ISO_i (ISO_I | 0x20)
100 #define ISO_l (ISO_L | 0x20)
101 #define ISO_m (ISO_M | 0x20)
102 #define ISO_p (ISO_P | 0x20)
103 #define ISO_r (ISO_R | 0x20)
104 #define ISO_t (ISO_T | 0x20)
105 
106 #define ISO_ht 0x09
107 #define ISO_nl 0x0a
108 #define ISO_cr 0x0d
109 #define ISO_space 0x20
110 #define ISO_bang 0x21
111 #define ISO_citation 0x22
112 #define ISO_ampersand 0x26
113 #define ISO_citation2 0x27
114 #define ISO_asterisk 0x2a
115 #define ISO_dash 0x2d
116 #define ISO_slash 0x2f
117 #define ISO_semicolon 0x3b
118 #define ISO_lt 0x3c
119 #define ISO_eq 0x3d
120 #define ISO_gt 0x3e
121 
122 #define ISO_rbrack 0x5b
123 #define ISO_lbrack 0x5d
124 
125 #define MINORSTATE_NONE 0
126 #define MINORSTATE_TEXT 1 /* Parse normal text */
127 #define MINORSTATE_EXTCHAR 2 /* Check for semi-colon */
128 #define MINORSTATE_TAG 3 /* Check for name of tag. */
129 #define MINORSTATE_TAGEND 4 /* Scan for end of tag. */
130 #define MINORSTATE_TAGATTR 5 /* Parse tag attr. */
131 #define MINORSTATE_TAGATTRSPACE 6 /* Parse optional space after tag
132  attr. */
133 #define MINORSTATE_TAGATTRPARAM 7 /* Parse tag attr parameter. */
134 #define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
135  quotation marks. */
136 #define MINORSTATE_HTMLCOMMENT 9 /* Scan for HTML comment end */
137 
138 #define MAJORSTATE_NONE 0
139 #define MAJORSTATE_BODY 1
140 #define MAJORSTATE_LINK 2
141 #define MAJORSTATE_FORM 3
142 #define MAJORSTATE_DISCARD 4
143 
144 
145 struct htmlparser_state {
146 
147  unsigned char minorstate;
148  char tag[20];
149  unsigned char tagptr;
150  char tagattr[20];
151  unsigned char tagattrptr;
152  char tagattrparam[WWW_CONF_MAX_URLLEN + 1];
153  unsigned char tagattrparamptr;
154  unsigned char lastchar, quotechar;
155  unsigned char majorstate, lastmajorstate;
156  char linkurl[WWW_CONF_MAX_URLLEN + 1];
157 
158  char word[WWW_CONF_WEBPAGE_WIDTH];
159  unsigned char wordlen;
160 
161 #if WWW_CONF_FORMS
162  char formaction[WWW_CONF_MAX_FORMACTIONLEN + 1];
163  unsigned char inputtype;
164  char inputname[WWW_CONF_MAX_INPUTNAMELEN + 1];
165  char inputvalue[WWW_CONF_MAX_INPUTVALUELEN + 1];
166  unsigned char inputvaluesize;
167 #endif /* WWW_CONF_FORMS */
168 };
169 
170 static struct htmlparser_state s;
171 
172 /*-----------------------------------------------------------------------------------*/
173 static char last[1] = {(char)0xff};
174 
175 static const char *tags[] = {
176 #define TAG_FIRST 0
177 #define TAG_SLASHA 0
178  html_slasha,
179 #define TAG_SLASHDIV 1
180  html_slashdiv,
181 #define TAG_SLASHFORM 2
182  html_slashform,
183 #define TAG_SLASHH 3
184  html_slashh,
185 #define TAG_SLASHSCRIPT 4
186  html_slashscript,
187 #define TAG_SLASHSELECT 5
188  html_slashselect,
189 #define TAG_SLASHSTYLE 6
190  html_slashstyle,
191 #define TAG_A 7
192  html_a,
193 #define TAG_BODY 8
194  html_body,
195 #define TAG_BR 9
196  html_br,
197 #define TAG_FORM 10
198  html_form,
199 #define TAG_FRAME 11
200  html_frame,
201 #define TAG_H1 12
202  html_h1,
203 #define TAG_H2 13
204  html_h2,
205 #define TAG_H3 14
206  html_h3,
207 #define TAG_H4 15
208  html_h4,
209 #define TAG_IMG 16
210  html_img,
211 #define TAG_INPUT 17
212  html_input,
213 #define TAG_LI 18
214  html_li,
215 #define TAG_P 19
216  html_p,
217 #define TAG_SCRIPT 20
218  html_script,
219 #define TAG_SELECT 21
220  html_select,
221 #define TAG_STYLE 22
222  html_style,
223 #define TAG_TR 23
224  html_tr,
225 #define TAG_LAST 24
226  last,
227 };
228 
229 /*-----------------------------------------------------------------------------------*/
230 static unsigned char CC_FASTCALL
231 iswhitespace(char c)
232 {
233  return (c == ISO_space ||
234  c == ISO_nl ||
235  c == ISO_cr ||
236  c == ISO_ht);
237 }
238 /*-----------------------------------------------------------------------------------*/
239 #if WWW_CONF_FORMS
240 static void
241 init_input(void)
242 {
243  s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
244  s.inputname[0] = s.inputvalue[0] =
245  s.formaction[WWW_CONF_MAX_FORMACTIONLEN] =
246  s.inputname[WWW_CONF_MAX_INPUTNAMELEN] =
247  s.inputvalue[WWW_CONF_MAX_INPUTVALUELEN] = 0;
248  s.inputvaluesize = 20; /* De facto default size */
249 }
250 #endif /* WWW_CONF_FORMS */
251 /*-----------------------------------------------------------------------------------*/
252 void
253 htmlparser_init(void)
254 {
255  s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
256  s.minorstate = MINORSTATE_TEXT;
257  s.lastchar = 0;
258 #if WWW_CONF_FORMS
259  s.formaction[0] = 0;
260 #endif /* WWW_CONF_FORMS */
261 }
262 /*-----------------------------------------------------------------------------------*/
263 static char CC_FASTCALL
264 lowercase(char c)
265 {
266  /* XXX: This is a *brute force* approach to lower-case
267  converting and should *not* be used anywhere else! It
268  works for our purposes, however (i.e., HTML tags). */
269  if(c > 0x40) {
270  return (c & 0x1f) | 0x60;
271  } else {
272  return c;
273  }
274 }
275 /*-----------------------------------------------------------------------------------*/
276 static void
277 endtagfound(void)
278 {
279  s.tag[s.tagptr] = 0;
280  s.tagattr[s.tagattrptr] = 0;
281  s.tagattrparam[s.tagattrparamptr] = 0;
282 }
283 /*-----------------------------------------------------------------------------------*/
284 static void CC_FASTCALL
285 switch_majorstate(unsigned char newstate)
286 {
287  if(s.majorstate != newstate) {
288  PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
289  s.lastmajorstate = s.majorstate;
290  s.majorstate = newstate;
291  }
292 }
293 /*-----------------------------------------------------------------------------------*/
294 static void CC_FASTCALL
295 add_char(unsigned char c)
296 {
297  if(s.wordlen < WWW_CONF_WEBPAGE_WIDTH - 1 && c < 0x80) {
298  s.word[s.wordlen] = c;
299  ++s.wordlen;
300  }
301 }
302 /*-----------------------------------------------------------------------------------*/
303 static void
304 do_word(void)
305 {
306  if(s.wordlen > 0) {
307  if(s.majorstate == MAJORSTATE_LINK) {
308  if(s.word[s.wordlen] != ISO_space) {
309  add_char(ISO_space);
310  }
311  } else if(s.majorstate == MAJORSTATE_DISCARD) {
312  s.wordlen = 0;
313  } else {
314  s.word[s.wordlen] = '\0';
315  htmlparser_word(s.word, s.wordlen);
316  s.wordlen = 0;
317  }
318  }
319 }
320 /*-----------------------------------------------------------------------------------*/
321 static void
322 newline(void)
323 {
324  do_word();
325  htmlparser_newline();
326 }
327 /*-----------------------------------------------------------------------------------*/
328 static unsigned char CC_FASTCALL
329 find_tag(char *tag)
330 {
331  static unsigned char first, last, i, tabi;
332  static char tagc;
333 
334  first = TAG_FIRST;
335  last = TAG_LAST;
336  i = 0;
337 
338  do {
339  tagc = tag[i];
340 
341  if((tagc == 0 || tagc == ISO_slash) && tags[first][i] == 0) {
342  return first;
343  }
344 
345  tabi = first;
346 
347  /* First, find first matching tag from table. */
348  while(tagc > (tags[tabi])[i] && tabi < last) {
349  ++tabi;
350  }
351  first = tabi;
352 
353  /* Second, find last matching tag from table. */
354  while(tagc == (tags[tabi])[i] && tabi < last) {
355  ++tabi;
356  }
357  last = tabi;
358 
359  /* If first and last matching tags are equal, we have a non-match
360  and return. Else we continue with the next character. */
361  ++i;
362 
363  } while(last != first);
364  return TAG_LAST;
365 }
366 /*-----------------------------------------------------------------------------------*/
367 static void
368 parse_tag(void)
369 {
370  static char *tagattrparam;
371  static unsigned char size;
372 
373  static char dummy;
374 
375  PRINTF(("Parsing tag '%s' '%s' '%s'\n", s.tag, s.tagattr, s.tagattrparam));
376 
377  switch(find_tag(s.tag)) {
378  case TAG_P:
379  case TAG_H1:
380  case TAG_H2:
381  case TAG_H3:
382  case TAG_H4:
383  newline();
384  /* FALLTHROUGH */
385  case TAG_BR:
386  case TAG_TR:
387  case TAG_SLASHDIV:
388  case TAG_SLASHH:
389  dummy = 0;
390  newline();
391  break;
392  case TAG_LI:
393  newline();
394  add_char(ISO_asterisk);
395  add_char(ISO_space);
396  break;
397  case TAG_SCRIPT:
398  case TAG_STYLE:
399  case TAG_SELECT:
400  switch_majorstate(MAJORSTATE_DISCARD);
401  break;
402  case TAG_SLASHSCRIPT:
403  case TAG_SLASHSTYLE:
404  case TAG_SLASHSELECT:
405  do_word();
406  switch_majorstate(s.lastmajorstate);
407  break;
408  case TAG_BODY:
409  s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
410  break;
411  case TAG_FRAME:
412  if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 && s.tagattrparam[0] != 0) {
413  switch_majorstate(MAJORSTATE_BODY);
414  newline();
415  add_char(ISO_rbrack);
416  do_word();
417  htmlparser_link((char *)html_frame, (unsigned char)strlen(html_frame), s.tagattrparam);
418  PRINTF(("Frame [%s]\n", s.tagattrparam));
419  add_char(ISO_lbrack);
420  newline();
421  }
422  break;
423  case TAG_IMG:
424  if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 && s.tagattrparam[0] != 0) {
425  add_char(ISO_lt);
426  tagattrparam = &s.tagattrparam[0];
427  while(*tagattrparam) {
428  add_char(*tagattrparam);
429  ++tagattrparam;
430  }
431  add_char(ISO_gt);
432  do_word();
433  }
434  break;
435  case TAG_A:
436  PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
437  if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 && s.tagattrparam[0] != 0) {
438  strcpy(s.linkurl, s.tagattrparam);
439  do_word();
440  switch_majorstate(MAJORSTATE_LINK);
441  }
442  break;
443  case TAG_SLASHA:
444  if(s.majorstate == MAJORSTATE_LINK) {
445  switch_majorstate(s.lastmajorstate);
446  s.word[s.wordlen] = 0;
447  htmlparser_link(s.word, s.wordlen, s.linkurl);
448  s.wordlen = 0;
449  }
450  break;
451 #if WWW_CONF_FORMS
452  case TAG_FORM:
453  /* First check if we are called at the end of a form tag. If
454  so, we should propagate the form action. */
455  if(s.tagattr[0] == 0 && s.formaction[0] != 0) {
456  htmlparser_form(s.formaction);
457  init_input();
458  } else {
459  PRINTF(("Form tag\n"));
460  switch_majorstate(MAJORSTATE_FORM);
461  if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
462  PRINTF(("Form action '%s'\n", s.tagattrparam));
463  strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
464  }
465  }
466  break;
467  case TAG_SLASHFORM:
468  switch_majorstate(MAJORSTATE_BODY);
469  s.formaction[0] = 0;
470  break;
471  case TAG_INPUT:
472  if(s.majorstate == MAJORSTATE_FORM) {
473  /* First check if we are called at the end of an input tag. If
474  so, we should render the input widget. */
475  if(s.tagattr[0] == 0 && s.inputname[0] != 0) {
476  PRINTF(("Render input type %d\n", s.inputtype));
477  switch(s.inputtype) {
478  case HTMLPARSER_INPUTTYPE_NONE:
479  case HTMLPARSER_INPUTTYPE_TEXT:
480  case HTMLPARSER_INPUTTYPE_HIDDEN:
481  htmlparser_inputfield(s.inputtype, s.inputvaluesize, s.inputvalue, s.inputname);
482  break;
483  case HTMLPARSER_INPUTTYPE_SUBMIT:
484  case HTMLPARSER_INPUTTYPE_IMAGE:
485  htmlparser_submitbutton(s.inputvalue, s.inputname);
486  break;
487  }
488  init_input();
489  } else {
490  PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
491  if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
492  if(strncmp(s.tagattrparam, html_submit, sizeof(html_submit)) == 0) {
493  s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
494  } else if(strncmp(s.tagattrparam, html_image, sizeof(html_image)) == 0) {
495  s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
496  } else if(strncmp(s.tagattrparam, html_text, sizeof(html_text)) == 0) {
497  s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
498  } else if(strncmp(s.tagattrparam, html_hidden, sizeof(html_hidden)) == 0) {
499  s.inputtype = HTMLPARSER_INPUTTYPE_HIDDEN;
500  } else {
501  s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
502  }
503  } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
504  strncpy(s.inputname, s.tagattrparam, WWW_CONF_MAX_INPUTNAMELEN);
505  } else if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
506  s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
507  strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN);
508  } else if(strncmp(s.tagattr, html_value, sizeof(html_value)) == 0) {
509  strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN);
510  } else if(strncmp(s.tagattr, html_size, sizeof(html_size)) == 0) {
511  size = 0;
512  if(s.tagattrparam[0] >= '0' &&
513  s.tagattrparam[0] <= '9') {
514  size = s.tagattrparam[0] - '0';
515  if(s.tagattrparam[1] >= '0' &&
516  s.tagattrparam[1] <= '9') {
517  size = size * 10 + (s.tagattrparam[1] - '0');
518  }
519  }
520  if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
521  size = WWW_CONF_MAX_INPUTVALUELEN - 1;
522  }
523  s.inputvaluesize = size;
524  }
525  }
526  }
527  break;
528 #endif /* WWW_CONF_FORMS */
529  }
530 }
531 /*-----------------------------------------------------------------------------------*/
532 static uint16_t CC_FASTCALL
533 parse_word(char *data, uint8_t dlen)
534 {
535  static uint8_t i;
536  static uint8_t len;
537  unsigned char c;
538 
539  len = dlen;
540 
541  switch(s.minorstate) {
542  case MINORSTATE_TEXT:
543  for(i = 0; i < len; ++i) {
544  c = data[i];
545  if(iswhitespace(c)) {
546  do_word();
547  } else if(c == ISO_lt) {
548  s.minorstate = MINORSTATE_TAG;
549  s.tagptr = 0;
550  break;
551  } else if(c == ISO_ampersand) {
552  s.minorstate = MINORSTATE_EXTCHAR;
553  break;
554  } else {
555  add_char(c);
556  }
557  }
558  break;
559  case MINORSTATE_EXTCHAR:
560  for(i = 0; i < len; ++i) {
561  c = data[i];
562  if(c == ISO_semicolon) {
563  s.minorstate = MINORSTATE_TEXT;
564  add_char(' ');
565  break;
566  } else if(iswhitespace(c)) {
567  s.minorstate = MINORSTATE_TEXT;
568  add_char('&');
569  add_char(' ');
570  break;
571  }
572  }
573  break;
574  case MINORSTATE_TAG:
575  /* We are currently parsing within the name of a tag. We check
576  for the end of a tag (the '>' character) or whitespace (which
577  indicates that we should parse a tag attr argument
578  instead). */
579  for(i = 0; i < len; ++i) {
580  c = data[i];
581  if(c == ISO_gt) {
582  /* Full tag found. We continue parsing regular text. */
583  s.minorstate = MINORSTATE_TEXT;
584  s.tagattrptr = s.tagattrparamptr = 0;
585  endtagfound();
586  parse_tag();
587  break;
588  } else if(iswhitespace(c)) {
589  /* The name of the tag found. We continue parsing the tag
590  attr.*/
591  s.minorstate = MINORSTATE_TAGATTR;
592  s.tagattrptr = 0;
593  endtagfound();
594  break;
595  } else {
596  /* Keep track of the name of the tag, but convert it to
597  lower case. */
598  s.tag[s.tagptr] = lowercase(c);
599  ++s.tagptr;
600  /* Check if the ->tag field is full. If so, we just eat up
601  any data left in the tag. */
602  if(s.tagptr == sizeof(s.tag)) {
603  s.minorstate = MINORSTATE_TAGEND;
604  break;
605  }
606  }
607 
608  /* Check for HTML comment, indicated by <!-- */
609  if(s.tagptr == 3 &&
610  s.tag[0] == ISO_bang &&
611  s.tag[1] == ISO_dash &&
612  s.tag[2] == ISO_dash) {
613  PRINTF(("Starting comment...\n"));
614  s.minorstate = MINORSTATE_HTMLCOMMENT;
615  s.tagptr = 0;
616  endtagfound();
617  break;
618  }
619  }
620  break;
621  case MINORSTATE_TAGATTR:
622  /* We parse the "tag attr", i.e., the "href" in <a
623  href="...">. */
624  for(i = 0; i < len; ++i) {
625  c = data[i];
626  if(c == ISO_gt) {
627  /* Full tag found. */
628  s.minorstate = MINORSTATE_TEXT;
629  s.tagattrparamptr = 0;
630  s.tagattrptr = 0;
631  endtagfound();
632  parse_tag();
633  s.tagptr = 0;
634  endtagfound();
635  break;
636  } else if(iswhitespace(c)) {
637  if(s.tagattrptr == 0) {
638  /* Discard leading spaces. */
639  } else {
640  /* A non-leading space is the end of the attribute. */
641  s.tagattrparamptr = 0;
642  endtagfound();
643  parse_tag();
644  s.minorstate = MINORSTATE_TAGATTRSPACE;
645  break;
646  }
647  } else if(c == ISO_eq) {
648  s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
649  s.tagattrparamptr = 0;
650  endtagfound();
651  break;
652  } else {
653  s.tagattr[s.tagattrptr] = lowercase(c);
654  ++s.tagattrptr;
655  /* Check if the "tagattr" field is full. If so, we just eat
656  up any data left in the tag. */
657  if(s.tagattrptr == sizeof(s.tagattr)) {
658  s.minorstate = MINORSTATE_TAGEND;
659  break;
660  }
661  }
662  }
663  break;
664  case MINORSTATE_TAGATTRSPACE:
665  for(i = 0; i < len; ++i) {
666  c = data[i];
667  if(iswhitespace(c)) {
668  /* Discard spaces. */
669  } else if(c == ISO_eq) {
670  s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
671  s.tagattrparamptr = 0;
672  endtagfound();
673  parse_tag();
674  break;
675  } else {
676  s.tagattr[0] = lowercase(c);
677  s.tagattrptr = 1;
678  s.minorstate = MINORSTATE_TAGATTR;
679  break;
680  }
681  }
682  break;
683  case MINORSTATE_TAGATTRPARAMNQ:
684  /* We are parsing the "tag attr parameter", i.e., the link part
685  in <a href="link">. */
686  for(i = 0; i < len; ++i) {
687  c = data[i];
688  if(c == ISO_gt) {
689  /* Full tag found. */
690  endtagfound();
691  parse_tag();
692  s.minorstate = MINORSTATE_TEXT;
693  s.tagattrptr = 0;
694  endtagfound();
695  parse_tag();
696  s.tagptr = 0;
697  endtagfound();
698  break;
699  } else if(iswhitespace(c) && s.tagattrparamptr == 0) {
700  /* Discard leading spaces. */
701  } else if((c == ISO_citation ||
702  c == ISO_citation2) && s.tagattrparamptr == 0) {
703  s.minorstate = MINORSTATE_TAGATTRPARAM;
704  s.quotechar = c;
705  PRINTF(("tag attr param q found\n"));
706  break;
707  } else if(iswhitespace(c)) {
708  PRINTF(("Non-leading space found at %d\n", s.tagattrparamptr));
709  /* Stop parsing if a non-leading space was found */
710  endtagfound();
711  parse_tag();
712 
713  s.minorstate = MINORSTATE_TAGATTR;
714  s.tagattrptr = 0;
715  endtagfound();
716  break;
717  } else {
718  s.tagattrparam[s.tagattrparamptr] = c;
719  ++s.tagattrparamptr;
720  /* Check if the "tagattr" field is full. If so, we just eat
721  up any data left in the tag. */
722  if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
723  s.minorstate = MINORSTATE_TAGEND;
724  break;
725  }
726  }
727  }
728  break;
729  case MINORSTATE_TAGATTRPARAM:
730  /* We are parsing the "tag attr parameter", i.e., the link
731  part in <a href="link">. */
732  for(i = 0; i < len; ++i) {
733  c = data[i];
734  if(c == s.quotechar) {
735  /* Found end of tag attr parameter. */
736  endtagfound();
737  parse_tag();
738 
739  s.minorstate = MINORSTATE_TAGATTR;
740  s.tagattrptr = 0;
741  endtagfound();
742  break;
743  } else {
744  if(iswhitespace(c)) {
745  s.tagattrparam[s.tagattrparamptr] = ISO_space;
746  } else {
747  s.tagattrparam[s.tagattrparamptr] = c;
748  }
749 
750  ++s.tagattrparamptr;
751  /* Check if the "tagattr" field is full. If so, we just eat
752  up any data left in the tag. */
753  if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
754  s.minorstate = MINORSTATE_TAGEND;
755  break;
756  }
757  }
758  }
759  break;
760  case MINORSTATE_HTMLCOMMENT:
761  for(i = 0; i < len; ++i) {
762  c = data[i];
763  if(c == ISO_dash) {
764  ++s.tagptr;
765  } else if(c == ISO_gt && s.tagptr > 0) {
766  PRINTF(("Comment done.\n"));
767  s.minorstate = MINORSTATE_TEXT;
768  break;
769  } else {
770  s.tagptr = 0;
771  }
772  }
773  break;
774  case MINORSTATE_TAGEND:
775  /* Discard characters until a '>' is seen. */
776  for(i = 0; i < len; ++i) {
777  if(data[i] == ISO_gt) {
778  s.minorstate = MINORSTATE_TEXT;
779  s.tagattrptr = 0;
780  endtagfound();
781  parse_tag();
782  break;
783  }
784  }
785  break;
786  default:
787  i = 0;
788  break;
789  }
790  if(i >= len) {
791  return len;
792  }
793  return i + 1;
794 }
795 /*-----------------------------------------------------------------------------------*/
796 void
797 htmlparser_parse(char *data, uint16_t datalen)
798 {
799  uint16_t plen;
800 
801  while(datalen > 0) {
802  if(datalen > 255) {
803  plen = parse_word(data, 255);
804  } else {
805  plen = parse_word(data, (uint8_t)datalen);
806  }
807  datalen -= plen;
808  data += plen;
809  }
810 }
811 /*-----------------------------------------------------------------------------------*/
#define CC_FASTCALL
Configure if the C compiler supports fastcall function declarations.
Definition: cc.h:77