//
// aegis - project change supervisor
// Copyright (C) 2002-2008 Peter Miller
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see
// .
//
#include
#include
#include
#include
#include
bool warning;
static int limit;
static int number_of_blank_lines;
static int number_of_errors;
static int line_number;
static bool dos_format;
static bool binary_format;
static FILE *fp;
static const char *fn;
static bool isa_c_file;
static bool isa_cxx_file;
static bool isa_h_file;
static int unprintable_ok;
enum state_t
{
state_normal,
state_double_quote,
state_double_quote_escape,
state_single_quote,
state_single_quote_escape,
state_slash,
state_cxx_comment,
state_c_comment,
state_c_comment_begin,
state_c_comment_doxygen,
state_c_comment_star
};
static state_t state;
static void
check_c_comment(void)
{
if (isa_h_file && !isa_c_file)
{
isa_h_file = false;
isa_c_file = true;
}
if (isa_cxx_file)
{
fprintf(stderr, "%s: %d: C comment in a C++ file\n",
fn, line_number);
++number_of_errors;
}
}
//
// The run_state_machine function is used to track the C and C++
// comment state of the file. This allows us to generate warnings
// about inappropriate comments.
//
// All C++ compilers accept C comments, and many C compilers accept C++
// comments (and a recent standard extention permits them).
// However, file_check warns about cross-language comments.
//
// The rationale of this is that the comments give the human reader
// an important clue as to which language they should expect to
// be reading. While the semantic differences between C and C++ are
// obviously different in many cases, in some cases they are merely
// subltly different. Not all C code works identically in C++.
// Consistent use of the approriate comment form gives the human reader
// an obvious reminder.
//
// Exception: The Doxygen and DOC++ comment form is / * * blah * /,
// and this is accepted in C++, mostly because Doxygen and DOC++ don't
// treat runs of /// as a single block comment. Usually such comments
// are isolated in the obviously different class declarations, so should
// not be an issue.
//
static void
run_state_machine(int c)
{
if (!isa_c_file && !isa_cxx_file && !isa_h_file)
return;
switch (state)
{
case state_normal:
//
// This state is for the body of a C or C++ file. We aren't in
// a string, or a character onstant, or any kind of comment.
//
switch (c)
{
case '/':
state = state_slash;
break;
case '\'':
state = state_single_quote;
break;
case '"':
state = state_double_quote;
break;
default:
break;
}
break;
case state_slash:
//
// In this state we have seen a slash. It could be the start
// of a C or C++ commant, or just one of the division operators.
//
switch (c)
{
case '/':
//
// We have seen the start of a C++ comment.
//
state = state_cxx_comment;
if (isa_h_file && !isa_cxx_file)
{
isa_h_file = true;
isa_cxx_file = true;
}
if (isa_c_file)
{
fprintf(stderr, "%s: %d: C++ comment in a C file\n",
fn, line_number);
++number_of_errors;
}
break;
case '*':
//
// We have seen the start of a C comment, but it could be
// a Doxygen introducer, so we can't whine if it's a C++
// file just yet.
//
state = state_c_comment_begin;
break;
default:
//
// One of the division operators.
// No need to change state.
//
state = state_normal;
break;
}
break;
case state_double_quote:
//
// In this state we have seen a double quote, and possibly
// some content. We are waiting for the closing double quote.
//
switch (c)
{
case '\\':
//
// Start of an escape sequence.
//
state = state_double_quote_escape;
break;
case '"':
case '\n':
//
// Normal and abnormal string constant termination.
//
state = state_normal;
break;
default:
//
// Still in the string. No need to change state.
//
break;
}
break;
case state_double_quote_escape:
//
// We throw away the character immediately following the
// backslash. Escape sequences can be longer than this, but
// are uninteresting to the state machine. The only sequences
// which can confuse the state machine are escaped backslash,
// escaped double quote and escaped newline.
//
state = state_double_quote;
break;
case state_single_quote:
//
// In this state we have seen a single quote, and possibly
// some content. We are waiting for the closing single quote.
//
switch (c)
{
case '\\':
//
// Start of an escape sequence.
//
state = state_single_quote_escape;
break;
case '\'':
case '\n':
//
// Normal and abnormal character constant termination.
//
state = state_normal;
break;
default:
//
// Still in the character constant. No need to change state.
//
break;
}
break;
case state_single_quote_escape:
//
// We throw away the character immediately following the
// backslash. Escape sequences can be longer than this, but
// are uninteresting to the state machine. The only sequences
// which can confuse the state machine are escaped backslash,
// escaped single quote and escaped newline.
//
state = state_single_quote;
break;
case state_cxx_comment:
//
// We ahve seen '/', '/', and possubly some content.
// We are waiting for the newline which finishes the comment.
//
if (c == '\n')
state = state_normal;
break;
case state_c_comment_begin:
//
// We have seen '/'and '*'. We are waiting for '*' which could
// start a Doxygen comment, or anything else which indicates
// the start of a normal C comment.
//
if (c == '*')
state = state_c_comment_doxygen;
else
{
check_c_comment();
state = state_c_comment;
}
break;
case state_c_comment_doxygen:
//
// We have seen '/', '*' and '*'.
//
switch (c)
{
case '/':
//
// This is the end of a very short normal C comment.
//
check_c_comment();
state = state_normal;
break;
case '*':
//
// This is the start of a very ugly normal C comment.
//
check_c_comment();
state = state_c_comment_star;
break;
default:
//
// This is a Doxygen comment. It is allowed in both C and
// C++ files, due to the limitations of Doxygen. Sigh.
//
state = state_c_comment_star;
break;
}
break;
case state_c_comment:
//
// We are in the body of a C comment. We are waiting for a '*'
// which could start the comment terminator.
//
if (c == '*')
state = state_c_comment_star;
break;
case state_c_comment_star:
//
// We have seen a '*' which could preceed a '/' to finish a C comment.
//
switch (c)
{
case '/':
//
// C comment terminator.
//
state = state_normal;
break;
case '*':
//
// Almost. The next '/' will end the comment.
//
break;
default:
//
// No, back to the body of the comment.
//
state = state_c_comment;
break;
}
break;
}
}
static int
check_one_line(void)
{
int unprintable;
int white_space;
int pos;
int line_contains_white_space;
++line_number;
pos = 0;
unprintable = 0;
white_space = 0;
line_contains_white_space = 0;
for (;;)
{
int c;
c = getc(fp);
if (c == EOF)
{
if (ferror(fp))
nfatal("read %s", fn);
if (pos)
{
fprintf
(
stderr,
"%s: %d: last line has no newline\n",
fn,
line_number
);
++number_of_errors;
goto done;
}
return 0;
}
run_state_machine(c);
switch (c)
{
case '\f':
++pos;
break;
case '\r':
c = getc(fp);
if (c == EOF)
{
c = '\r';
++unprintable;
++pos;
break;
}
if (c != '\n')
{
ungetc(c, fp);
++unprintable;
++pos;
++white_space;
break;
}
dos_format = true;
// fall through...
case '\n':
done:
if (unprintable && !unprintable_ok)
{
fprintf
(
stderr,
"%s: %d: line contains %d unprintable character%s\n",
fn,
line_number,
unprintable,
(unprintable == 1 ? "" : "s")
);
++number_of_errors;
}
if (white_space)
{
fprintf
(
stderr,
"%s: %d: white space at end of line\n",
fn,
line_number
);
++number_of_errors;
}
if (pos > limit && line_contains_white_space)
{
fprintf
(
stderr,
"%s: %d: line too long (by %d)\n",
fn,
line_number,
pos - limit
);
++number_of_errors;
}
if (pos)
number_of_blank_lines = 0;
else
++number_of_blank_lines;
return 1;
case '\t':
pos = (pos + 8) & ~7;
++white_space;
++line_contains_white_space;
break;
case ' ':
++pos;
++white_space;
++line_contains_white_space;
break;
default:
if (c == 0)
binary_format = true;
assert(c != EOF);
if (!isprint((unsigned char)c))
++unprintable;
++pos;
white_space = 0;
break;
}
}
}
static bool
begins_with(const char *haystack, const char *needle)
{
size_t len1 = strlen(haystack);
size_t len2 = strlen(needle);
return (len1 >= len2 && 0 == memcmp(haystack, needle, len2));
}
static bool
ends_with(const char *haystack, const char *needle)
{
size_t len1 = strlen(haystack);
size_t len2 = strlen(needle);
return (len1 >= len2 && 0 == memcmp(haystack + len1 - len2, needle, len2));
}
void
check(const char *file_name)
{
//
// Skip over leading baseline symlinks.
//
const char *short_file_name = file_name;
while (short_file_name[0] == 'b' && short_file_name[1] == 'l')
short_file_name += 2;
if (*short_file_name == '/')
++short_file_name;
else
short_file_name = file_name;
limit = 80;
isa_c_file = ends_with(file_name, ".c") || ends_with(file_name, ".C");
isa_cxx_file =
ends_with(file_name, ".cc") || ends_with(file_name, ".CC") ||
ends_with(file_name, ".cpp") || ends_with(file_name, ".CPP");
isa_h_file = ends_with(file_name, ".h") || ends_with(file_name, ".H");
unprintable_ok =
(
begins_with(short_file_name, "lib/")
&&
strstr(short_file_name, "/LC_MESSAGES/")
);
if (begins_with(short_file_name, "test/") && ends_with(file_name, ".sh"))
{
limit = 510;
unprintable_ok = true;
}
if (ends_with(file_name, ".xml"))
{
limit = 510;
}
state = state_normal;
fn = file_name;
fp = fopen(fn, "r");
if (!fp)
nfatal("open %s", fn);
number_of_errors = 0;
number_of_blank_lines = 0;
line_number = 0;
dos_format = false;
binary_format = false;
while (check_one_line())
;
if (dos_format)
{
fprintf(stderr, "%s: file in DOS format (must use UNIX format)\n", fn);
++number_of_errors;
}
if (number_of_blank_lines > 0)
{
fprintf
(
stderr,
"%s: %d: found %d blank line%s at the end of the file\n",
fn,
(line_number <= 1 ? 1 : line_number - 1),
number_of_blank_lines,
(number_of_blank_lines == 1 ? "" : "s")
);
++number_of_errors;
}
if (binary_format)
{
fprintf
(
stderr,
"%s: file appears to be binary, it needs to be replaced "
"with a plain-text file\n",
fn
);
++number_of_errors;
}
if (number_of_errors > 0 && !warning)
{
fatal_raw
(
"%s: found %d fatal error%s",
fn,
number_of_errors,
(number_of_errors == 1 ? "" : "s")
);
}
if (fclose(fp))
nfatal("close %s", fn);
fn = 0;
fp = 0;
}