/*
* uniqx ver.1.0
* date: 2005/02/03
*
* Kenar (Kenji Arisaw)
* E-mail: arisawa@aichi-u.ac.jp
*/
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#define gline() Brdstr(&in, '\n', 1)
#define MFIELD 128
typedef struct Ranges Ranges;
struct Ranges{
int n;
int m;
Ranges *next;
};
Ranges *ranges;
Biobuf in;
Biobuf out;
int count=0;
int cflag=0;
int uflag=0;
int dflag=0;
int rflag=0;
int fflag=0;
void pline(char *bp);
int C_equal(Ranges *ranges, char *b1, char *b2);
int F_equal(Ranges *ranges, char *b1, char *b2);
int (*equal)(Ranges *ranges, char *b1, char *b2);
void
appendRange(Ranges **r, int n, int m)
{
Ranges *p, *q;
q = nil;
p = *r;
while(p){
q = p;
p = p->next;
}
// then p is nil
p = malloc(sizeof(Ranges));
if(q)
q ->next = p;
else
*r = p;
p->n = n;
p->m = m;
p->next = nil;
}
void
usage(void)
{
fprint(2,"usage:\n\
uniq [-udcrf] [-k key,key,...] [file]\n\
-udc flag is same as that of uniq.\n\
-r flag is `print last line of duplicated line'\n\
-f flag means field wise comparison.\n\
key follows Python syntax of range;\n\
that is, one of `n:m' `:m' `n:' `:'\n\
n begins with 0\n\
defaut is: uniqx -k 0:\n");
exits("usage");
}
void
main(int argc, char *argv[])
{
int fd,n,m;
char *file;
char *keys=nil;
char *s,c;
char *range[2];
int nrange;
char *b1;
char *b2;
ARGBEGIN{
case 'c':
cflag = 1;
break;
case 'd':
dflag = 1;
break;
case 'f':
fflag = 1;
break;
case 'k':
keys=ARGF();
if(keys == nil)
usage();
break;
case 'r':
rflag = 1;
break;
case 'u':
uflag = 1;
break;
default:
usage();
}ARGEND
if(uflag && dflag)
sysfatal("# incompatible flag -u and -d");
if(keys == nil)
keys = "0:";
equal = C_equal;
if(fflag)
equal = F_equal;
for(;;){
for(s = keys; *s && *s != ','; s++);
c = *s;
if(*s == ',')
*s = 0;
nrange = getfields(keys,range,2,0,":");
n = 0;
m = 0;
if(nrange == 1){
n = atoi(range[0]);
m = n + 1;
}
if(nrange == 2){
n = atoi(range[0]);
m = atoi(range[1]);
}
if(n < 0)
sysfatal("# field out of range");
appendRange(&ranges, n,m);
if(c == 0)
break;
keys = ++s;
}
file = *argv++;
if(file && *argv != nil)
usage();
fd = 0;
if(file){
fd = open(file, OREAD);
if(fd < 0)
sysfatal("# cannot open %s: %r\n", *argv);
}
Binit(&in, fd, OREAD);
Binit(&out, 1, OWRITE);
if((b1 = gline()) == nil)
exits(0);
b1 = strdup(b1);
count = 1;
for(;;){
if((b2 = gline()) == nil) {
pline(b1);
exits(0);
}
if(!equal(ranges, b1, b2)){
pline(b1);
free(b1);
b1 = strdup(b2);
count = 1;
continue;
}
if(rflag){
free(b1);
b1 = strdup(b2);
}
count++;
}
}
void
pline(char *bp)
{
if(uflag && count != 1)
return;
if(dflag && count == 1)
return;
if(cflag)
Bprint(&out, "%4d ", count);
Bprint(&out, "%s\n", bp);
}
int
F_equal(Ranges *ranges, char *b1, char *b2)
{
char *tb1[MFIELD], *tb2[MFIELD];
int n1,n2,i,n,m,min,max;
char *bb1;
char *bb2;
int status;
bb1 = strdup(b1);
bb2 = strdup(b2);
n1 = tokenize(bb1, tb1, MFIELD);
n2 = tokenize(bb2, tb2, MFIELD);
min = (n1<n2)?n1:n2;
max = (n1<n2)?n2:n1;
if(max == MFIELD)
sysfatal("# fields over limit %d", MFIELD);
while(ranges){
n = ranges->n;
m = ranges->m;
if(m == 0 || m > max)
m = max;
if(n < min){
for(i=n; i < m; i++){
if(strcmp(tb1[i], tb2[i]) != 0){
status = 0;
goto L1;
}
}
}
else if(n < max){
status = 0;
goto L1;
}
ranges = ranges->next;
}
status = 1;
L1: free(bb1); free(bb2);
return status;
}
int
C_equal(Ranges *ranges, char *b1, char *b2)
{
int n1,n2,min,max, n, m;
n1 = strlen(b1);
n2 = strlen(b2);
min = (n1<n2)?n1:n2;
max = (n1<n2)?n2:n1;
while(ranges){
n = ranges->n;
m = ranges->m;
if(m == 0 || m > max)
m = max;
if(n < min){
if(strncmp(b1+n,b2+n,m-n) != 0)
return 0;
}
else if(n < max)
return 0;
ranges = ranges->next;
}
return 1;
}
|