You are not the first person to run into this problem with iconv
. Someone has written a Perl script to solve it.
iconv
doesn't handle large files well. From the glibc source code, in iconv/iconv_prog.c
:
/* Since we have to deal with
arbitrary encodings we must read the whole text in a buffer and
process it in one step. */
However, for your particular case, it might be better to write your own UTF-8 validator. You could easily distill iconv -c -f utf8 -t utf8
down to a small C program, with a loop that calls iconv(3)
. Since UTF-8 is modeless and self-synchronizing, you can process it in chunks.
#include <errno.h>
#include <iconv.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define BUFSIZE 4096
/* Copy STDIN to STDOUT, omitting invalid UTF-8 sequences */
int main() {
char ib[BUFSIZE], ob[BUFSIZE], *ibp, *obp;
ssize_t bytes_read;
size_t iblen = 0, oblen;
unsigned long long total;
iconv_t cd;
if ((iconv_t)-1 == (cd = iconv_open("utf8", "utf8"))) {
perror("iconv_open");
return 2;
}
for (total = 0;
bytes_read = read(STDIN_FILENO, ib + iblen, sizeof(ib) - iblen);
total += bytes_read - iblen) {
if (-1 == bytes_read) { /* Handle read error */
perror("read");
return 1;
}
ibp = ib; iblen += bytes_read;
obp = ob; oblen = sizeof(ob);
if (-1 == iconv(cd, &ibp, &iblen, &obp, &oblen)) {
switch (errno) {
case EILSEQ: /* Invalid input multibyte sequence */
fprintf(stderr, "Invalid multibyte sequence at byte %llu\n",
1 + total + sizeof(ib) - iblen);
ibp++; iblen--; /* Skip the bad byte next time */
break;
case EINVAL: /* Incomplete input multibyte sequence */
break;
default:
perror("iconv");
return 2;
}
}
write(STDOUT_FILENO, ob, sizeof(ob) - oblen);
/* There are iblen bytes at the end of ib that follow an invalid UTF-8
sequence or are part of an incomplete UTF-8 sequence. Move them to
the beginning of ib. */
memmove(ib, ibp, iblen);
}
return iconv_close(cd);
}
Any recent syslog-daemon (like syslog-ng or rsyslog) supports filter-functionality. Just edit your rsyslog.conf
or syslog-ng-conf
to ignore entries for the process name /USR/SBIN/CRON
which contain the string CMD
.
Even better: you just log these messages into another file (e. g. cron-detail.log
).
Documentation links:
Best Answer
I would guess sed still might create the temp file, but the following might do what you want? (Using strace on this might show you if sed creates a temp file or not).
The exclamation inverts the match, d is for delete, so this removes all lines that don't have bar in them.