summaryrefslogtreecommitdiff
path: root/doc/utils/html2four.c
diff options
context:
space:
mode:
Diffstat (limited to 'doc/utils/html2four.c')
-rw-r--r--doc/utils/html2four.c298
1 files changed, 0 insertions, 298 deletions
diff --git a/doc/utils/html2four.c b/doc/utils/html2four.c
deleted file mode 100644
index fc1100d01..000000000
--- a/doc/utils/html2four.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- extract headers from HTML files
- in format suitable for turning into permuted index
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-/*
- maximum sizes for input line and for name in <a> tag
-*/
-#define MAX_LINE 512
-#define MAX_NAME 64
-
-/*
- functions
- all return 0 for OK, 1 for errors
-*/
-int do_file( char *, FILE * ) ;
-int parse_line( char * ) ;
-int print_line( char *, char *) ;
-int print_header_problem( char * ) ;
-int sanity() ;
-
-void die( char * ) ;
-
-char *prog_name ;
-int max_level ;
-char *current_file ;
-
-int main(int argc, char* argv[])
-{
- char *p ;
- int temp, done, status ;
- FILE *fp ;
-
- prog_name = *argv ;
- argc--,argv++ ;
-
- max_level = 9 ;
- if(argc && *argv ) {
- p = *argv ;
- if( p[0] == '-' ) {
- if( isdigit(p[1]) && p[2] == '\0' ) {
- max_level = p[1] - 0 ;
- argc-- ;
- argv++ ;
- }
- else die("unknown option") ;
- } }
-
- status = done = 0 ;
- if( argc == 0) {
- if( (status = do_file("STDIN", stdin)) == 0 )
- done++ ;
- }
- else {
-/*
- printf("ARGC = %d\n", argc ) ;
-*/
- while( argc-- ) {
- p = *argv++ ;
-/*
- printf("ARGV P %s %s\n", *argv, p) ;
-*/
- if( p == NULL ) {
- fprintf(stderr, "%s: null filename pointer\n", prog_name) ;
- status++ ;
- }
- else if( (fp = fopen(p,"r")) == NULL ) {
- fprintf(stderr, "%s: cannot open file %s\n", prog_name, p) ;
- status++ ;
- }
- else {
- if( (temp = do_file(p, fp)) != 0 )
- status++ ;
- done++ ;
- fclose(fp) ;
- }
- fflush(stderr) ;
- fflush(stdout) ;
- }
- }
-/*
- printf("%s: %d files processed, %d with errors\n", prog_name, done, status) ;
-*/
- return( status ? 1 : 0 ) ;
-}
-
-void die( char *message )
-{
- fflush(stdout) ;
- fprintf(stderr, "%s: %s\n", prog_name, message) ;
- exit(1) ;
-}
-
-int header_flags[10] ;
-int in_header ;
-
-char buffer[MAX_LINE+1] ;
-char label[MAX_NAME+1] ;
-
-int do_file( char *file, FILE *fp )
-{
- int i, status, x, y ;
- char *base, *p ;
-
- status = 0 ;
- in_header = 0 ;
- label[0] = '\0' ;
- for( i = 0 ; i < 10 ; i++ )
- header_flags[i] = 0 ;
- current_file = file ;
-
- while( base = fgets(buffer, MAX_LINE, fp) ) {
- // count < and > characters in line
- for( x = y = 0, p = base ; *p ; p++ )
- switch( *p ) {
- case '<':
- x++ ;
- break ;
- case '>':
- y++ ;
- break ;
- default:
- break ;
- }
- // skip line if no < or >
- if( x == 0 && y == 0 )
- continue ;
- // report error for unequal count
- else if( x != y ) {
- if( strncmp( base, "<!--", 4) && strncmp(base, "-->", 3) ) {
- fflush(stdout) ;
- fprintf(stderr, "%s in file %s: unequal < > counts %d %d\n",
- prog_name, file, x, y ) ;
- fprintf(stderr, "%s: %s\n", prog_name, base) ;
- fflush(stderr) ;
- status = 1 ;
- }
- continue ;
- }
- // parse lines containing tags
- else
- if( parse_line(base) )
- status = 1 ;
- // check that header labelling is sane
- for( i = x = y = 0 ; i < 10 ; i++ ) {
- // count non-zero entries
- if( x = header_flags[i] )
- y++ ;
- // should be in 0 or 1 headers at a time
- if( x > 1 || x < 0 )
- status = 1 ;
- }
- if( y > 1 )
- status = 1 ;
- }
- return status ;
-}
-
-int parse_line( char *data )
-{
- char *p, *q, *end ;
- int x ;
-
- // set end pointer
- for( end = data ; *end ; end++ )
- ;
- // trim off trailing returns or newlines
- for( p = end - 1, q = end ; q > data ; p--,q-- ) {
- switch( *p ) {
- case '\012':
- case '\015':
- *p = '\0' ;
- continue ;
- default:
- break ; // out of switch()
- }
- break ; // out of for()
- }
- end = q ;
- p = data ;
- while( p < end ) {
- // find tag delimiters
- if( *p == '<') {
- for( q = p + 1 ; *q ; q++ )
- if( *q == '<' || *q == '>' )
- break ;
- // if we find another '<'
- // restart tag search from it
- if( *q == '<' ) {
- p = q ;
- continue ;
- }
- // "<>" is not interesting
- if( q == p + 1 ) {
- fflush(stdout) ;
- fprintf(stderr, "%s: null tag\n", prog_name) ;
- fprintf(stderr, "%s: line\n", prog_name, data) ;
- fflush(stderr) ;
- p = q + 1 ;
- continue ;
- }
- // ignore delimiters once found
- *q = '\0' ;
- p++ ;
- // p points to tag contents, null terminated
- switch( *p ) {
- // save contents of <a name= > tags
- case 'a' :
- case 'A' :
- if( p[1] == ' ' &&
- (p[2] == 'n' || p[2] == 'N') &&
- (p[3] == 'a' || p[3] == 'A') &&
- (p[4] == 'm' || p[4] == 'M') &&
- (p[5] == 'e' || p[5] == 'E') &&
- p[6] == '=' )
- strncpy(label, p + 7, MAX_NAME) ;
- break ;
- case 'b' :
- case 'B' :
- if( in_header && strlen(p) == 2 &&
- (p[1] == 'r' || p[1] == 'R') )
- putchar(' ') ;
- break ;
- // header tags
- case 'h' :
- case 'H' :
- if( strlen(p) == 2 && isdigit(p[1]) ) {
- if( in_header )
- fprintf(stderr, "%s: bad header nesting in %s\n",
- prog_name, current_file) ;
- x = p[1] - '0' ;
- in_header = 1 ;
- header_flags[x]++ ;
- printf("%s\t%s\tH%d\t", current_file, label, x) ;
- }
- break ;
- // only care about end-of-header
- case '/':
- p++ ;
- switch( *p ) {
- case 'h' :
- case 'H' :
- if( strlen(p) == 2 && isdigit(p[1]) ) {
- if( ! in_header )
- fprintf(stderr, "%s: bad header nesting in %s\n",
- prog_name, current_file) ;
- x = p[1] - '0' ;
- in_header = 0 ;
- header_flags[x]-- ;
- printf("\n") ;
- }
- break ;
- }
- break ;
- // uninteresting tag, look for next
- default :
- break ;
- }
- // tag done, point p beyond it
- p = q + 1 ;
- }
- else if( in_header ) {
- if( isprint(*p) && *p != '\n' )
- putchar(*p) ;
- else
- putchar(' ');
- p++ ;
- }
- else
- p++ ;
- }
- return(0) ;
-}
-
-int print_line( char *tag, char *text)
-{
- printf("%%s\ts\t%s\t%s\t\n", current_file, label, tag, text) ;
- return 0 ;
-}
-
-int print_header_problem( char *file )
-{
- int i ;
- fflush(stdout) ;
- fprintf(stderr, "%s: HEADER TAG PROBLEM in file %s\n", prog_name, file) ;
- fprintf(stderr, "%s: counts", prog_name) ;
- for ( i = 0 ; i < 10 ; i++ )
- fprintf(stderr, "\t%d", i) ;
- fprintf(stderr,"\n") ;
- fflush(stderr) ;
- return(0) ;
-}
-