bvi/recomp.c

281 lines
5.4 KiB
C

/* recomp - regular expression compiler
*
* NOTE: Edit this file with tabstop=4 !
*
* 1996-01-06 created;
* 2000-04-25 V 1.3.0 beta
* 2000-07-12 V 1.3.0 final
* 2019-01-28 V 1.4.1
*
* Copyright 1996-2019 by Gerhard Buergmann
* gerhard@puon.at
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* See file COPYING for information on distribution conditions.
*/
/* You cannot use a common regexp subroutine, because \0 is a regular
* character in a binary string !
*/
#include "bvi.h"
#include "set.h"
char *poi;
int smode;
int again = 0;
int magic = 1;
int ignore_case = 0;
extern long bytepos;
extern int ignore_case;
extern char *emptyclass;
/*
* Compiling an ASCII sequence to a regex string
*/
int
ascii_comp(smem, pattern)
char *smem;
char *pattern;
{
char *end;
char *comp;
char cc, cc1;
char *counter;
int count;
int bracket, dot;
comp = smem;
poi = pattern;
while (*poi != END) {
bracket = FALSE;
if (magic) {
if (*poi == '[') bracket = TRUE;
} else {
if (*poi == '\\' && *(poi + 1) == '[') {
bracket = TRUE;
poi++;
}
}
if (bracket) {
if (!(end = strchr(poi, ']'))) {
emsg("Missing ]");
return 1; }
poi++;
if (*poi == ']' || (*poi == '^' && *(poi + 1) == ']')) {
emsg(emptyclass);
return 1; }
if (magic) {
if (*(end + 1) == '*') *comp++ = STAR;
else *comp++ = ONE;
} else {
if (*(end + 1) == '\\' && *(end + 2) == '*') *comp++ = STAR;
else *comp++ = ONE;
}
count = 0;
counter = comp;
comp++;
if (*poi != '^') {
*comp++ = '\0';
count++; }
while (end > poi) {
if (*poi == '-') {
if (ignore_case) {
cc = toupper(*(poi - 1));
cc1 = toupper(*(poi + 1));
} else {
cc = *(poi - 1);
cc1 = *(poi + 1);
}
while (cc <= cc1) {
*comp++ = cc++;
count++;
}
poi++; poi++;
} else {
count++;
if (ignore_case)
*comp++ = toupper(*poi++);
else
*comp++ = *poi++;
}
}
poi++;
*counter = count;
} else {
dot = FALSE;
if (magic) {
if (*poi == '.') dot = TRUE;
} else {
if (*poi == '\\' && *(poi + 1) == '.') {
dot = TRUE; poi++; }
}
if (*poi == '\\') {
switch (*(poi + 1)) {
case 'n': *++poi = '\n'; break;
case 'r': *++poi = '\r'; break;
case 't': *++poi = '\t'; break;
case '0': *++poi = '\0'; break;
default : ++poi;
}
}
if (magic)
if (*(poi + 1) == '*') *comp++ = STAR;
else *comp++ = ONE;
else
if (*(poi + 1) == '\\' && *(poi + 2) == '*') *comp++ = STAR;
else *comp++ = ONE;
if (dot) {
*comp++ = 0;
poi++;
} else {
*comp++ = 1;
if (ignore_case)
*comp++ = toupper(*poi++);
else
*comp++ = *poi++;
}
}
if (magic) {
if (*poi == '*') poi++;
} else {
if (*poi == '\\' && *(poi + 1) == '*') { poi++; poi++; }
}
}
*comp = END;
smode = ASCII;
return 0;
}
/*
* Compiling a hex expression to a regex string
*/
int
hex_comp(smem, pattern)
char *smem;
char *pattern;
{
char *end;
char *comp;
int cc, ccm;
char *counter;
int count, nr;
comp = smem;
poi = pattern;
while (*poi != END) {
while (*poi == ' ' || *poi == '\t') poi++;
if (*poi == '[') {
if (!(end = strchr(poi, ']'))) {
emsg("Missing ]");
return 1; }
poi++;
while (*poi == ' ' || *poi == '\t') poi++;
if (*poi == ']' || (*poi == '^' && *(poi + 1) == ']')) {
emsg(emptyclass);
return 1; }
if (*(end + 1) == '*') *comp++ = STAR;
else *comp++ = ONE;
count = 1;
counter = comp;
comp++;
if (*poi == '^')
*comp++ = *poi++;
else
*comp++ = '\0';
while (end > poi) {
if (*poi == ' ' || *poi == '\t') poi++;
else if (*poi == '-') {
cc = *(comp - 1);
poi++;
if ((ccm = hexchar()) < 0) return 1;
while (cc <= ccm) {
*comp++ = cc++;
count++;
}
poi++; poi++;
} else {
if ((nr = hexchar()) < 0) return 1;
count++;
*comp++ = nr;
}
}
poi++;
*counter = count;
if (*poi == '*') poi++;
} else if (*poi == '"') {
poi++;
if (!(end = strchr(poi, '"'))) {
/*
emsg("Missing '\"'");
return 1;
*/
end = poi + strlen(poi);
}
while (end > poi) {
*comp++ = 1;
*comp++ = 1;
if (ignore_case)
*comp++ = toupper(*poi++);
else
*comp++ = *poi++;
}
poi++;
} else {
if (*poi == '.') {
if (*(poi + 1) == '*') { *comp++ = STAR; poi++; }
else *comp++ = ONE;
*comp++ = 0;
poi++;
} else {
if ((nr = hexchar()) < 0) return 1;
if (*poi == '*') { poi++; *comp++ = STAR; }
else *comp++ = ONE;
*comp++ = 1;
*comp++ = nr;
}
}
}
*comp = END;
smode = HEX;
return 0;
}
int
hexchar()
{
int nr;
char tmpbuf[3];
if (isxdigit(*poi)) {
tmpbuf[0] = *poi++;
tmpbuf[1] = '\0';
if (isxdigit(*poi)) tmpbuf[1] = *poi++;
tmpbuf[2] = '\0';
sscanf(tmpbuf, "%2x", &nr);
while (*poi == ' ' || *poi == '\t') poi++;
return nr;
} else {
emsg("Bad hex character@in expression");
return -1;
}
}