# skip over header material # Copyright (C) 1999 Sandy Harris. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. See . # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # RCSID $Id: html2txt.sed,v 1.1 2004/03/15 20:35:24 as Exp $ //,/<\/head>/d //,/<\/HEAD>/d /<^body$>/d s/// # eliminate possible DOS crud s/\015// #get rid of HTML comments s/// //d # citations & emphasis -> visible s//"/g s/<\/cite>/"/g s//*/g s/<\/em>/*/g s//!->/g s/<\/strong>/<-!/g s///g s/<\/b>//g s/
/Quote -->/ s/<\/blockquote>/<-- End Quote/ # mark headers s/

/Header 1: / s/

/Header 2: / s/

/Header 3: / s/

/Header 4: / s/

/Header 5: / s/
/Header 6: / # remove some cruft s/<\/h[1-6]>// /^$/d s/// # definition lists s/
// s/<\/dl>// s/^
$/-----------------------------------------/ s/^
/-----------------------------------------\ / s/
/\ / # other types of lists s/
  • // s/
      // s/
        // s/<\/ol>// s/<\/ul>// # tables s/// s/<\/table>// s/// s/
        / /g # line break and paragraph markers # different subst depending where they are in line s/^
        // s/
        $// s/
        /\ / s/^

        $// s/

        $/\ / s/^

        /\ / s/

        /\ \ / s/<\/p>// # remove more cruft s/

        //
        s/<\/pre>//
        s/<\/body>//
        s/<\/html//
        s/<\/BODY>//
        s/<\/HTML>//