//
// aegis - project change supervisor
// Copyright (C) 2004-2006, 2008, 2009, 2012 Peter Miller
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
//
#include
#include
#include
#include
regular_expression::~regular_expression()
{
trace(("~regular_expression()\n"));
if (compiled)
regfree(&preg);
}
regular_expression::regular_expression(const nstring &a_pattern, bool a_icase) :
lhs(a_pattern),
icase(a_icase),
compiled(false)
{
trace(("regular_expression(lhs = \"%s\")\n", lhs.c_str()));
}
void
regular_expression::set_error(int err)
{
trace(("regular_expression::set_error()\n"));
char buffer[100];
regerror(err, &preg, buffer, sizeof(buffer));
errstr = nstring(buffer);
}
bool
regular_expression::compile()
{
if (compiled)
return true;
trace(("regular_expression::compile()\n"));
int flags = REG_EXTENDED;
if (icase)
flags |= REG_ICASE;
int err = regcomp(&preg, lhs.c_str(), flags);
if (err)
set_error(err);
else
compiled = true;
return compiled;
}
const char *
regular_expression::strerror()
const
{
if (errstr.empty())
return 0;
return errstr.c_str();
}
bool
regular_expression::matches(const char *text, size_t &so, size_t &eo)
{
trace(("regular_expression::matches(text = \"%s\")\n{\n", text));
if (!compile())
{
trace(("return false;\n}\n"));
return false;
}
int err = regexec(&preg, text, SIZEOF(regmatch), regmatch, 0);
if (err)
{
if (err == REG_NOMATCH)
errstr.clear();
else
set_error(err);
trace(("return false;\n}\n"));
return false;
}
#ifdef DEBUG
for (size_t j = 0; j < 10; ++j)
{
if (regmatch[j].rm_so != -1)
{
trace(("regmatch[%d] = {%3d,%3d };\n", (int)j, regmatch[j].rm_so,
regmatch[j].rm_eo));
}
}
#endif
so = regmatch[0].rm_so;
eo = regmatch[0].rm_eo;
trace(("return true;\n}\n"));
return true;
}
bool
regular_expression::match(const nstring &actual, size_t offset)
{
trace(("regular_expression::match(actual = \"%.*s\" / \"%s\", "
"offset = %ld)\n{\n", (int)offset, actual.c_str(),
actual.c_str() + offset, (long)offset));
if (!compile())
{
trace(("return false;\n}\n"));
return false;
}
int err =
regexec
(
&preg,
actual.c_str() + offset,
SIZEOF(regmatch),
regmatch,
(offset == 0 ? 0 : REG_NOTBOL)
);
if (err)
{
if (err == REG_NOMATCH)
errstr.clear();
else
set_error(err);
trace(("return false;\n}\n"));
return false;
}
#ifdef DEBUG
for (size_t j = 0; j < 10; ++j)
{
if (regmatch[j].rm_so != -1)
{
trace(("regmatch[%d] = {%3d,%3d };\n", (int)j, regmatch[j].rm_so,
regmatch[j].rm_eo));
}
}
#endif
trace(("return true;\n}\n"));
return true;
}
bool
regular_expression::match_and_substitute(const nstring &rhs,
const nstring &actual, long how_many_times, nstring &output)
{
trace(("regular_expression::match_and_substitute(rhs = \"%s\", "
"actual = \"%s\", how_many_times = %ld)\n{\n", rhs.c_str(),
actual.c_str(), how_many_times));
if (how_many_times <= 0)
how_many_times = actual.size() + 1;
nstring_accumulator nsa;
bool suppress_on_zero = false;
size_t offset = 0;
while (offset < actual.size())
{
trace(("nsa = \"%.*s\";\n", (int)nsa.size(), nsa.get_data()));
if (!match(actual, offset))
{
if (!errstr.empty())
{
trace(("return false\n}\n"));
return false;
}
break;
}
//
// There is a nasty boundary condition: we need to move past the
// infinite loop of a zero-length match. (This can happen for
// non-trivial patterns.)
//
// This test occurs immediately after we have seen a match,
// except at the beginning. We step over zero length matches by
// treating the next character as an invariant portion, mostly
// as if the match was (1,1) instead of (0,0), but don't insert
// the RHS (which is what the "suppress" name is all about).
//
if (regmatch[0].rm_eo == 0 && suppress_on_zero)
{
nsa.push_back(actual[offset]);
++offset;
suppress_on_zero = false;
continue;
}
//
// copy the invariant portion
//
if (regmatch[0].rm_so)
nsa.push_back(actual.c_str() + offset, regmatch[0].rm_so);
//
// replace the matched portion with the right hand side
//
for (const char *cp = rhs.c_str(); *cp; ++cp)
{
switch (*cp)
{
default:
nsa.push_back(*cp);
break;
case '&':
nsa.push_back
(
actual.c_str() + offset + regmatch[0].rm_so,
regmatch[0].rm_eo - regmatch[0].rm_so
);
break;
case '\\':
++cp;
switch (*cp)
{
default:
errstr =
nstring::format
(
"Replacement escape \\%c unknown",
*cp
);
trace(("return false;\n}\n"));
return false;
case '&':
case '\\':
nsa.push_back(*cp);
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9':
{
regmatch_t *rm = ®match[*cp - '0'];
if (rm->rm_so < 0)
{
errstr =
nstring::format
(
"Replacement \\%c not available",
*cp
);
trace(("return false;\n}\n"));
return false;
}
nsa.push_back
(
actual.c_str() + offset + rm->rm_so,
rm->rm_eo - rm->rm_so
);
}
break;
case '\0':
errstr = "Replacement escape unterminated";
trace(("return false;\n}\n"));
return false;
}
break;
}
}
suppress_on_zero = true;
//
// Move past the matched portion.
//
offset += regmatch[0].rm_eo;
//
// There is a nasty boundary condition: we need to
// move past the infinite loop of a zero-length match.
// (This can happen for non-trivial patterns.)
//
suppress_on_zero = true;
//
// Limit how many times we go through this loop.
//
--how_many_times;
if (how_many_times <= 0)
break;
}
//
// Collect the tail-end of the input.
//
if (offset < actual.size())
nsa.push_back(actual.c_str() + offset, actual.size() - offset);
//
// Build the answer.
//
output = nsa.mkstr();
trace_nstring(output);
trace(("return true;\n}\n"));
return true;
}
// vim: set ts=8 sw=4 et :