Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sys/shell: refactor tokenizer code. #12085

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 121 additions & 130 deletions sys/shell/shell.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include "shell.h"
#include "shell_commands.h"

Expand All @@ -45,171 +46,161 @@ static void _putchar(int c) {
#endif
#endif

#ifdef MODULE_SHELL_COMMANDS
#define _builtin_cmds _shell_command_list
#else
#define _builtin_cmds NULL
#endif

static void flush_if_needed(void)
{
#ifdef MODULE_NEWLIB
fflush(stdout);
#endif
}

static shell_command_handler_t find_handler(const shell_command_t *command_list, char *command)
static shell_command_handler_t search_commands(const shell_command_t *entry,
char *command)
{
const shell_command_t *command_lists[] = {
command_list,
#ifdef MODULE_SHELL_COMMANDS
_shell_command_list,
#endif
};
for (; entry->name != NULL; entry++) {
if (strcmp(entry->name, command) == 0) {
return entry->handler;
}
}
return NULL;
}

/* iterating over command_lists */
for (unsigned int i = 0; i < ARRAY_SIZE(command_lists); i++) {
static shell_command_handler_t find_handler(const shell_command_t *command_list, char *command)
{
shell_command_handler_t handler = NULL;

const shell_command_t *entry;
if (command_list != NULL) {
handler = search_commands(command_list, command);
}

if ((entry = command_lists[i])) {
/* iterating over commands in command_lists entry */
while (entry->name != NULL) {
if (strcmp(entry->name, command) == 0) {
return entry->handler;
}
else {
entry++;
}
}
}
if (handler == NULL && _builtin_cmds != NULL) {
handler = search_commands(_builtin_cmds, command);
}

return NULL;
return handler;
}

static void print_help(const shell_command_t *command_list)
static void print_commands(const shell_command_t *entry)
{
printf("%-20s %s\n", "Command", "Description");
puts("---------------------------------------");

const shell_command_t *command_lists[] = {
command_list,
#ifdef MODULE_SHELL_COMMANDS
_shell_command_list,
#endif
};
for (; entry->name != NULL; entry++) {
printf("%-20s %s\n", entry->name, entry->desc);
}
}

/* iterating over command_lists */
for (unsigned int i = 0; i < ARRAY_SIZE(command_lists); i++) {
static void print_help(const shell_command_t *command_list)
{
puts("Command Description\n---------------------------------------");

const shell_command_t *entry;
if (command_list != NULL) {
print_commands(command_list);
}

if ((entry = command_lists[i])) {
/* iterating over commands in command_lists entry */
while (entry->name != NULL) {
printf("%-20s %s\n", entry->name, entry->desc);
entry++;
}
}
if (_builtin_cmds != NULL) {
print_commands(_builtin_cmds);
}
}

enum PARSE_STATE {
PARSE_SPACE,
PARSE_UNQUOTED,
PARSE_SINGLEQUOTE,
PARSE_DOUBLEQUOTE,
PARSE_ESCAPE_MASK,
PARSE_UNQUOTED_ESC,
PARSE_SINGLEQUOTE_ESC,
PARSE_DOUBLEQUOTE_ESC,
};

static enum PARSE_STATE escape_toggle(enum PARSE_STATE s)
{
return s ^ PARSE_ESCAPE_MASK;
}

#define SQUOTE '\''
#define DQUOTE '"'
#define ESCAPECHAR '\\'
#define BLANK ' '

static void handle_input_line(const shell_command_t *command_list, char *line)
{
static const char *INCORRECT_QUOTING = "shell: incorrect quoting";

/* first we need to calculate the number of arguments */
unsigned argc = 0;
char *pos = line;
int contains_esc_seq = 0;
while (1) {
if ((unsigned char) *pos > ' ') {
/* found an argument */
if (*pos == '"' || *pos == '\'') {
/* it's a quoted argument */
const char quote_char = *pos;
do {
++pos;
if (!*pos) {
puts(INCORRECT_QUOTING);
return;
}
else if (*pos == '\\') {
/* skip over the next character */
++contains_esc_seq;
++pos;
if (!*pos) {
puts(INCORRECT_QUOTING);
return;
}
continue;
}
} while (*pos != quote_char);
if ((unsigned char) pos[1] > ' ') {
puts(INCORRECT_QUOTING);
return;
int argc = 0;
char *readpos = line;
char *writepos = readpos;
enum PARSE_STATE pstate = PARSE_SPACE;

while (*readpos != '\0') {
Copy link
Member

@jia200x jia200x Aug 28, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this part too hard to read. Maybe something like?

Suggested change
while (*readpos != '\0') {
while (*readpos != '\0') {
char wordbreak;
switch (pstate) {
case PARSE_SPACE:
if (*readpos != BLANK) {
argc++;
}
if (*readpos == SQUOTE) {
pstate = PARSE_SINGLEQUOTE;
} else if (*readpos == DQUOTE) {
pstate = PARSE_DOUBLEQUOTE;
} else if (*readpos == ESCAPECHAR) {
pstate = PARSE_UNQUOTED_ESC;
} else if (*readpos != BLANK) {
pstate = PARSE_UNQUOTED;
goto write;
}
readpos++;
continue;
case PARSE_UNQUOTED:
wordbreak = BLANK;
goto break_word;
case PARSE_SINGLEQUOTE:
wordbreak = SQUOTE;
goto break_word;
case PARSE_DOUBLEQUOTE:
wordbreak = DQUOTE;
goto break_word;
default: /* QUOTED state */
pstate = escape_toggle(pstate);
goto write;
}
break_word:
if (*readpos == wordbreak) {
pstate = PARSE_SPACE;
*writepos++ = '\0';
goto parse_end;
} else if (*readpos == ESCAPECHAR) {
pstate = escape_toggle(pstate);
goto parse_end;
}
write:
*writepos++ = *readpos;
parse_end:
readpos++;
}

It's a similar goto logic but the labels are not inside the switch.
Also, it seems to reduce the size (native) from:

Building application "default" for "native" with MCU "native".
   text    data     bss     dec     hex filename
  90573    1104   72088  163765   27fb5 /home/jialamos/Development/RIOT/examples/default/bin/native/default.elf

to

Building application "default" for "native" with MCU "native".

   text    data     bss     dec     hex filename
  90557    1104   72088  163749   27fa5 /home/jialamos/Development/RIOT/examples/default/bin/native/default.elf

On samr21-xpro, the build size keeps the same. No idea about others too.

Copy link
Member

@jia200x jia200x Aug 28, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's a non-goto version:

Suggested change
while (*readpos != '\0') {
for(readpos = line; *readpos != '\0'; readpos++) {
char wordbreak;
switch (pstate) {
case PARSE_SPACE:
if (*readpos != BLANK) {
argc++;
}
if (*readpos == SQUOTE) {
pstate = PARSE_SINGLEQUOTE;
} else if (*readpos == DQUOTE) {
pstate = PARSE_DOUBLEQUOTE;
} else if (*readpos == ESCAPECHAR) {
pstate = PARSE_UNQUOTED_ESC;
} else if (*readpos != BLANK) {
pstate = PARSE_UNQUOTED;
*writepos++ = *readpos;
}
continue;
case PARSE_UNQUOTED:
wordbreak = BLANK;
break;
case PARSE_SINGLEQUOTE:
wordbreak = SQUOTE;
break;
case PARSE_DOUBLEQUOTE:
wordbreak = DQUOTE;
break;
default: /* QUOTED state */
pstate = escape_toggle(pstate);
*writepos++ = *readpos;
continue;
}
if (*readpos == wordbreak) {
pstate = PARSE_SPACE;
*writepos++ = '\0';
} else if (*readpos == ESCAPECHAR) {
pstate = escape_toggle(pstate);
}
}

It's 4 bytes bigger on samr21-xpro

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a similar modification (I'm about to push the fixup). The size does not change in samr21 because the compiler is smart enough to move the code around, so the generated machine code is the same in both cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mmm, hold on, I don't think it improves so much in terms of clarity:

    while (*readpos != '\0') {
        char wordbreak;

        switch (pstate) {
            case PARSE_SPACE:
                if (*readpos != BLANK) {
                    argc++;
                }
                if (*readpos == SQUOTE) {
                    pstate = PARSE_SINGLEQUOTE;
                } else if (*readpos == DQUOTE) {
                    pstate = PARSE_DOUBLEQUOTE;
                } else if (*readpos == ESCAPECHAR) {
                    pstate = PARSE_UNQUOTED_ESC;
                } else if (*readpos != BLANK) {
                    pstate = PARSE_UNQUOTED;
                    goto write_word;
                }
                goto parse_end;
            case PARSE_UNQUOTED:
                wordbreak = BLANK;
                break;
            case PARSE_SINGLEQUOTE:
                wordbreak = SQUOTE;
                break;
            case PARSE_DOUBLEQUOTE:
                wordbreak = DQUOTE;
                break;
            default: /* QUOTED state */
                pstate = escape_toggle(pstate);
                goto write_word;
        }

        if (*readpos == wordbreak) {
            pstate = PARSE_SPACE;
            *writepos++ = '\0';
        } else if (*readpos == ESCAPECHAR) {
            pstate = escape_toggle(pstate);
        } else {
write_word:
            *writepos++ = *readpos;
        }
parse_end:
        readpos++;
    }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could draw the state diagram as a comment before the function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the state diagram would also help

char wordbreak;

switch (pstate) {
case PARSE_SPACE:
if (*readpos != BLANK) {
argc++;
}
}
else {
/* it's an unquoted argument */
do {
if (*pos == '\\') {
/* skip over the next character */
++contains_esc_seq;
++pos;
if (!*pos) {
puts(INCORRECT_QUOTING);
return;
}
}
++pos;
if (*pos == '"') {
puts(INCORRECT_QUOTING);
return;
}
} while ((unsigned char) *pos > ' ');
}

/* count the number of arguments we got */
++argc;
if (*readpos == SQUOTE) {
pstate = PARSE_SINGLEQUOTE;
} else if (*readpos == DQUOTE) {
pstate = PARSE_DOUBLEQUOTE;
} else if (*readpos == ESCAPECHAR) {
pstate = PARSE_UNQUOTED_ESC;
} else if (*readpos != BLANK) {
pstate = PARSE_UNQUOTED;
break;
}
goto parse_end;
case PARSE_UNQUOTED:
wordbreak = BLANK;
goto break_word;
case PARSE_SINGLEQUOTE:
wordbreak = SQUOTE;
goto break_word;
case PARSE_DOUBLEQUOTE:
wordbreak = DQUOTE;
break_word:
if (*readpos == wordbreak) {
pstate = PARSE_SPACE;
*writepos++ = '\0';
} else if (*readpos == ESCAPECHAR) {
pstate = escape_toggle(pstate);
} else {
break;
}
goto parse_end;
default: /* QUOTED state */
pstate = escape_toggle(pstate);
break;
}
*writepos++ = *readpos;
parse_end:
readpos++;
}

/* zero out the current position (space or quotation mark) and advance */
if (*pos > 0) {
*pos = 0;
++pos;
}
else {
break;
}
*writepos = '\0';

if (pstate != PARSE_SPACE && pstate != PARSE_UNQUOTED) {
puts(INCORRECT_QUOTING);
return;
}
if (!argc) {

if (argc == 0) {
return;
}

/* then we fill the argv array */
char *argv[argc + 1];
argv[argc] = NULL;
pos = line;
for (unsigned i = 0; i < argc; ++i) {
while (!*pos) {
++pos;
}
if (*pos == '"' || *pos == '\'') {
++pos;
}
argv[i] = pos;
while (*pos) {
++pos;
}
}
for (char **arg = argv; contains_esc_seq && *arg; ++arg) {
for (char *c = *arg; *c; ++c) {
if (*c != '\\') {
continue;
}
for (char *d = c; *d; ++d) {
*d = d[1];
}
if (--contains_esc_seq == 0) {
break;
}
}
int collected;
char *argv[argc];

readpos = line;
for (collected = 0; collected < argc; collected++) {
argv[collected] = readpos;
readpos += strlen(readpos) + 1;
}

/* then we call the appropriate handler */
Expand Down
5 changes: 5 additions & 0 deletions tests/shell/tests/01-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
('unknown_command', ('shell: command not found: unknown_command')),
('help', EXPECTED_HELP),
('echo a string', ('\"echo\"\"a\"\"string\"')),
('echo "t\e st" "\\"" '"'\\'' a\ b", ('"echo""te st"""""\'""a b"')),
('echo a\\', ('shell: incorrect quoting')),
('echo "', ('shell: incorrect quoting')),
("echo '", ('shell: incorrect quoting')),
('echo "\'" \'"\'', ('"echo""\'""""')),
('ps', EXPECTED_PS),
('garbage1234'+CONTROL_C, ('>')), # test cancelling a line
('help', EXPECTED_HELP),
Expand Down