On Mon, 11 Nov 2019 10:43:35 +1100, faeychild wrote:
On 11/11/19 9:33 am, Bit Twister wrote:
I have a folder full of manuals- disorganised of course
I can suggest getting the router manual on the system,
as root run /usr/bin/updatedb
Then run the create_index.pl script to get an index file of document
files on your system. then in your user account test it
firefox /tmp/index.html
-----8<-----8<-----8<--cut below this line ---8<-----8<-----8<-----8< #!/usr/bin/perl -w
#use warning ;
use strict ;
use diagnostics ;
#***************************************************************************** # Program: create_index.pl - create index.htm of htm*/text files on disk. #
# Purpose: Quick kludge to read list of html/text files and create
# an index.htm list containing titles for each file opened.
#
# PDF and text files are not parsed for title.
#
#
#
http://groups.google.com/group/alt.os.linux.mandriva/msg/0dafddb10b874257
#
#
# Assumptions:
# User or cron has executed updatedb
#
# Install: Save script as create_index.pl
# chmod +x create_index.pl
#
# To run: ./create_index.pl
#
# Output: /tmp/index.htm
#
# Note: Set $testing non-zero to leave work files and you
# set which urls to debug in this file.
# You'll find them at bottom of "locate -i" list.
# #*****************************************************************************
my ($input_line) = "" ; # line to scan for title
my ($lc_line) = "" ; # lower cased input_line
my ($len) = 0 ; # length of title string
my ($line_num) = 0 ; # line number
my ($list_fn) = "/tmp/index.list" ; # list of file names to parse
my ($out_fn) = "/tmp/index.html" ; # output index.html file
my ($sample) = 20 ; # sample lines before giving up
my ($sort) = "_.sort" ; # temp sort fn extension
my ($sys_fn) = "/tmp/index.bash" ; # script filename to generate list to parse my ($testing) = 0 ; # testing flag 0=not 1=testing
my ($title) = 0 ; # title string
my ($tmp_fn) = "" ; # *.htm* file name to scan
my ($t_start) = 0 ; # <title start column
my ($t_stop) = 0 ; # </title title stop column
sub fetch_title ; # get title and save it to $out_fn
sub fetch_title_end ; # find </title if not on same line
#
#***************************************************
#*
#* create the list of *htm* filenames to parse
#*
#***************************************************
open (SYS, ">$sys_fn") or die "Opening $sys_fn $!\n" ;
print SYS "#!/bin/bash\n" ;
print SYS "_exe=\$0\n" ;
print SYS "_app=\$(basename \$_exe)\n" ;
print SYS "tput clear\n" ;
print SYS "echo Executing locate from $sys_fn script\n" ;;
print SYS "echo \$_app assumes locate database is current\n" ;;
print SYS "echo Can take 2 minutes or more depending \n" ;
print SYS "echo on hardware and loaded documentation.\n" ;
print SYS "mkdir --parents --verbose /local/doc/\n" ;
print SYS "touch /local/doc/index.html\n" ;
print SYS "touch $list_fn\n" ;
print SYS "echo \"# Output from create_index.pl \" > $list_fn\n" ;
#***********************************************************
#*
#* The following fetches the oddball docs which do not have a
#* index.htm*. Downside is they also collect pages which
#* are called from index.htm* pages. Upside is you get to
#* see titles of chapters of documents you may not have checked.
#*
#***********************************************************
print SYS "locate -i afterstep.htm >> $list_fn\n" ;
print SYS "locate -i share/doc/ | grep -i \"\\.txt\" | grep -v \"\\.p\" >> $list_fn\n" ;
print SYS "locate -i share/doc/ | grep -i \"\\.howto\" >> $list_fn\n" ;
print SYS "locate -i FaxMail.html >> $list_fn\n" ;
print SYS "locate -i reference.htm >> $list_fn\n" ;
print SYS "locate -i doc/Maelstrom >> $list_fn\n" ;
print SYS "locate -i doc/Mesa >> $list_fn\n" ;
print SYS "locate -i doc/TiMidity >> $list_fn\n" ;
print SYS "locate -i doc/bind | grep -i \"\\.htm\" >> $list_fn\n" ;
print SYS "locate -i doc/cdialog | grep -i samples >> $list_fn\n" ;
print SYS "locate -i doc/chbg | grep -i sample >> $list_fn\n" ;
print SYS "locate -i doc/dhcp-client | grep -i \"\\.conf\" >> $list_fn\n"
print SYS "locate -i doc/dhcp-common | grep -i \"\\.conf\" >> $list_fn\n"
print SYS "locate -i doc/ed- | grep -i posix >> $list_fn\n" ;
print SYS "locate -i doc/esound- | grep -i tips >> $list_fn\n" ;
print SYS "locate -i doc/fetchmail | grep -i \"\\.html\" >> $list_fn\n" ;
print SYS "locate -i doc/fontconfig | grep -i \"\\.html\" >> $list_fn\n" ;
print SYS "locate -i doc/foomatic-db-engine | grep -i usage >> $list_fn\n"
print SYS "locate -i doc/foomatic-filters | grep -i usage >> $list_fn\n" ;
print SYS "locate -i doc/freeciv-data | grep -i howtoplay >> $list_fn\n" ;
print SYS "locate -i doc/freetype- | grep -i \"\\.htm\" >> $list_fn\n" ;
print SYS "locate -i doc/gettext- | grep -i \"AllNames\.html\" >> $list_fn\n" ;
print SYS "locate -i doc/gocr | grep -i gocr.html >> $list_fn\n" ;
print SYS "locate -i doc/gv- | grep -i gv.html >> $list_fn\n" ;
print SYS "locate -i doc/libfreetype | grep -i step | grep \"\\.htm\" >> $list_fn\n" ;
print SYS "locate -i ghttp.html >> $list_fn\n" ;
print SYS "locate -i glib.html >> $list_fn\n" ;
print SYS "locate -i glib_toc.html >> $list_fn\n" ;
print SYS "locate -i gtk_tut.html >> $list_fn\n" ;
print SYS "locate -i doc/libphp_common | grep -i php. >> $list_fn\n" ;
print SYS "locate -i doc/libpng | grep -i libpng.txt >> $list_fn\n" ;
print SYS "locate -i doc/libpng | grep -i example.c >> $list_fn\n" ;
print SYS "locate -i doc/libxclass | grep -i .notes >> $list_fn\n" ;
print SYS "locate -i doc/lilo-doc| grep -i QuickInst >> $list_fn\n" ;
print SYS "locate -i doc/methane | grep -i info.htm >> $list_fn\n" ;
print SYS "locate -i doc/mod_perl-common | grep -i mod_perl | grep -i \"\\.htm\" >> $list_fn\n" ;
print SYS "locate -i doc/mt-st | grep -i stinit.def.examples >> $list_fn\n" ;
print SYS "locate -i doc/nmap | grep -i nmap_doc.html >> $list_fn\n" ;
print SYS "locate -i doc/perl | grep -i cgi_docs.html >> $list_fn\n" ;
print SYS "locate -i doc/perl-CGI | grep -i cgi_docs.html >> $list_fn\n" ;
print SYS "locate -i doc/perl-Expect | grep -i kibitz >> $list_fn\n" ;
print SYS "locate -i doc/perl-Expect | grep -i tutorial >> $list_fn\n" ;
print SYS "locate -i doc/perl-GTK-Gnome | grep -i test.pl >> $list_fn\n" ;
print SYS "locate -i doc/perl-Gtk2 | grep -i examples >> $list_fn\n" ;
print SYS "locate -i doc/perl-Gtk2- | grep -i gtk-demo | grep -i \"\\.pl\" >> $list_fn\n" ;
print SYS "locate -i doc/perl-Net | grep -i \"\\.pl\" >> $list_fn\n" ;
print SYS "locate -i doc/perl-SDL | grep -i \"\\.pl\" >> $list_fn\n" ;
print SYS "locate -i sgmlspm.html >> $list_fn\n" ;
print SYS "locate -i sgmlspl.html >> $list_fn\n" ;
print SYS "locate -i doc/playmidi- | grep -i QuickStart >> $list_fn\n" ;
print SYS "locate -i doc/postgresql-python | grep -i /tutorial >> $list_fn\n";
print SYS "locate -i doc/ppp | grep -i /sample >> $list_fn\n" ;
print SYS "locate -i doc/ppp | grep -i /scripts >> $list_fn\n" ;
print SYS "locate -i doc/procmail | grep -i /examples >> $list_fn\n" ;
print SYS "locate -i doc/proftpd | grep -i Configuration.html >> $list_fn\n";
print SYS "locate -i pwdb.html >> $list_fn\n" ;
print SYS "locate -i doc/pygtk | grep -i \"\\.py\" >> $list_fn\n" ;
print SYS "locate -i doc/python-imaging | grep -i \"\\.py\" >> $list_fn\n"
print SYS "locate -i doc/rsync | grep -i rsync.html >> $list_fn\n" ;
print SYS "locate -i doc/rsync | grep -i rsyncd.conf.html >> $list_fn\n" ;
print SYS "locate -i doc/rxvt | grep -i /rxvtRef.html >> $list_fn\n" ;
print SYS "locate -i doc/rxvt | grep -i rxvt.html >> $list_fn\n" ;
print SYS "locate -i devguide.html >> $list_fn\n" ;
print SYS "locate -i slangdoc.html >> $list_fn\n" ;
print SYS "locate -i cref.html >> $list_fn\n" ;
print SYS "locate -i slang.html >> $list_fn\n" ;
print SYS "locate -i slrn-doc.html >> $list_fn\n" ;
print SYS "locate -i doc/spamassassin | grep -i procmailrc.example >> $list_fn\n" ;
print SYS "locate -i Howto-spec-helper >> $list_fn\n" ;
print SYS "locate -i sample.sudoers >> $list_fn\n" ;
print SYS "locate -i doc/xchat- | grep -i plugin | grep -i .html >> $list_fn\n" ;
print SYS "locate -i xfig_man.html >> $list_fn\n" ;
print SYS "locate -i xfig.html >> $list_fn\n" ;
print SYS "locate -i bash.htm >> $list_fn\n" ;
print SYS "locate -i bashref.htm >> $list_fn\n" ;
print SYS "locate -i grip.htm >> $list_fn\n" ;
print SYS "locate -i book1.htm >> $list_fn\n" ;
print SYS "locate -i clisp.htm >> $list_fn\n" ;
print SYS "locate -i changes.htm >> $list_fn\n" ;
print SYS "locate -i doc.htm >> $list_fn\n" ;
print SYS "locate -i docs.htm >> $list_fn\n" ;
print SYS "locate -i faq.htm >> $list_fn\n" ;
print SYS "locate -i glyphs.htm >> $list_fn\n" ;
print SYS "locate -i gperf.html >> $list_fn\n" ;
print SYS "locate -i guide.htm >> $list_fn\n" ;
print SYS "locate -i gv.htm >> $list_fn\n" ;
print SYS "locate -i howto | grep -i .htm >> $list_fn\n" ;
print SYS "locate -i hreads.htm >> $list_fn\n" ;
print SYS "locate -i ical.htm >> $list_fn\n" ;
print SYS "locate -i impnotes.htm >> $list_fn\n" ;
print SYS "locate -i intro.htm >> $list_fn\n" ;
print SYS "locate -i icewm.htm >> $list_fn\n" ;
print SYS "locate -i jikes.htm >> $list_fn\n" ;
print SYS "locate -i kikbd.htm >> $list_fn\n" ;
print SYS "locate -i less.htm >> $list_fn\n" ;
print SYS "locate -i magick.html >> $list_fn\n" ;
print SYS "locate -i man.htm >> $list_fn\n" ;
print SYS "locate -i manual.htm >> $list_fn\n" ;
print SYS "locate -i ntpd.htm >> $list_fn\n" ;
print SYS "locate -i pam.htm >> $list_fn\n" ;
print SYS "locate -i part1.htm >> $list_fn\n" ;
print SYS "locate -i plot.htm >> $list_fn\n" ;
print SYS "locate -i ppp.htm >> $list_fn\n" ;
print SYS "locate -i primer.htm >> $list_fn\n" ;
print SYS "locate -i pwdb.htm >> $list_fn\n" ;
print SYS "locate -i readme.htm >> $list_fn\n" ;
print SYS "locate -i setup.htm >> $list_fn\n" ;
print SYS "locate -i themes.htm >> $list_fn\n" ;
print SYS "locate -i threads.htm >> $list_fn\n" ;
print SYS "locate -i status.htm >> $list_fn\n" ;
print SYS "locate -i server.htm >> $list_fn\n" ;
print SYS "locate -i toc.htm >> $list_fn\n" ;
print SYS "locate -i use.htm >> $list_fn\n" ;
print SYS "locate -i share/doc/ | grep -i readme | grep -v \".bz\">> $list_fn\n" ;
print SYS "locate -i share/doc/ | grep -i faq | grep -v \"\\.gif\" \\n" ;
print SYS " | grep -v \"\\.xm\" | grep -v \"\\.d\" | grep -v \"\\.smgl\" \\n" ;
print SYS " | grep -v \"\\.m\" | grep -v \"\\.r\" >> $list_fn\n" ;
print SYS "locate -i .pdf | grep -v /accounts/chase/ | grep -v /accounts/taxes >> $list_fn\n" ;
print SYS "locate -i index.htm | grep -v \"\~\" | grep -v \"\\.doc\" | grep -v \"\\.bz\" >> $list_fn\n" ;
print SYS "locate -i content | grep -i \"\\.htm\" | grep -v \"\\.x\" \\n"
print SYS " | grep -v \"\\.p\" | grep -v \"\\.doc\" | grep -v \"\\.ja\" | grep -v \"\\.ko\" >> $list_fn\n" ;
print SYS "grep -v doc/privoxy/webserver/index.html $list_fn > $list_fn$sort \n" ;
print SYS "/bin/mv $list_fn$sort $list_fn\n" ;
if ( $testing == 0 )
{
print SYS "sort $list_fn | uniq > $list_fn$sort\n" ;
print SYS "/bin/mv $list_fn$sort $list_fn\n" ;
}
close SYS ;
if ( $testing == 0 )
{
system ("chmod +x $sys_fn" ) ; # set script executable
system ("$sys_fn") ; # generate the htm list
}
else
{
open (LIST, ">$list_fn") or die "Opening $list_fn $!\n" ;
print LIST "/usr/share/ImageMagick-5.5.7/index.html\n" ;
print LIST "/usr/share/doc/HTML/en/kdevelop/reference/C/EXAMPLES/README.html\n" ;
close LIST ;
}
#
#***************************************************
#*
#* Parse list of file name found in $list_fn
#*
#***************************************************
open (LIST, "<$list_fn") or die "Opening $list_fn $!\n" ;
open (OUT, ">$out_fn") or die "Opening $out_fn $!\n" ;
print OUT "<ul>\n" ; # add start of list html command
print "parsing $list_fn \n" ;
FN: while (<LIST>)
{
chomp ; # kill the line feed char
$tmp_fn = $_ ; # set file name to parse for title
#***************************************************
#*
#* Turn / and period into spaces and split the file name.
#* If file extension is htm* fetch the title
#*
#***************************************************
if (substr ($tmp_fn, 0, 1) eq "#" )
{
goto FN ;
}
if ($tmp_fn =~ /\.htm/i)
{
fetch_title ;
}
else
{
print OUT "<li><A HREF=\"$tmp_fn\">$tmp_fn</A>\n" ;
}
} # end while (<TMP>)
close LIST ;
#***************************************************
#*
#* add html list terminator
#*
#***************************************************
print OUT "</ul>\n" ; # add end of list html command
close OUT ;
if ( $testing == 0 )
{
system ("/bin/rm $list_fn") ;
system ("/bin/rm $sys_fn") ;
}
open (SYS, ">$sys_fn") or die "Opening $sys_fn $!\n" ;
print SYS "#!/bin/bash\n" ;
print SYS "_exe=\$0\n" ;
print SYS "_app=\$(basename \$_exe)\n" ;
print SYS "\necho \" \" \n" ;
print SYS "\necho \" \" \n" ;
print SYS "\necho \" \" \n" ;
print SYS "\necho Try: firefox $out_fn\n" ;
print SYS "\necho If you like it, you might want to save it to something like\n" ;
print SYS "\necho /bin/mv /tmp/index.html /local/doc/sys_doc.html\n" ;
print SYS "\necho or\n" ;
print SYS "\nset -- \$(cat /etc/release | tr '.' '_')\n" ;
print SYS "\n_ix_fn=/local/doc/sys_doc_\${4}_\${7}.html\n" ;
print SYS "\necho /bin/mv /tmp/index.html \$_ix_fn\n" ;
print SYS "\necho ln -sf \$_ix_fn /local/doc/sys_doc.html\n" ;
close SYS ;
system ("chmod +x $sys_fn" ) ; # set script executable
system ("$sys_fn") ; # generate the htm list
#
#***************************************************
#*
#* fetch_title - scan $tmp_fn looking for
#* <title>text string</title
#* or
#* ><title
#* >text string
#* </title
#* title can be upper/lower case.
#*
#***************************************************
sub fetch_title
{
open (TMP, "<$tmp_fn") or die "Opening $tmp_fn $!\n" ;
print "processing ===> $tmp_fn\n" ;
$t_start = 0 ;
$t_stop = 0 ;
$line_num = 0 ;
LINE: while (<TMP>)
{
chomp ; # kill the line feed char
$line_num++ ;
if (length ($_) == 0) # empty lime
{
goto LINE ; # go get another one
}
$input_line = $_ ; # save a copy for parsing
$lc_line = lc ($input_line) ; # lower case the line
$t_start = index ($lc_line, "<title") ;
if ($t_start >= 0) # now hunt for </title>
{
$t_start = $t_start + 7 ; # set end of <title>+1
$t_stop = index ($lc_line, "</title" ) ;
if ($t_stop < 0) # </title not on same line
{
fetch_title_end ;
}
$len = $t_stop - $t_start ; # get length of title string
$title = substr ($input_line, $t_start, $len) ;
if ( $len == 0 )
{
print OUT "<li><A HREF=\"$tmp_fn\">$tmp_fn</A>\n" ;
}
else
{
print OUT "<li><A HREF=\"$tmp_fn\">$title</A>\n" ;
}
$t_start = 0 ;
$t_stop = 0 ;
last ; # break out of while loop
} # end if ($t_start != 0)
if ($line_num > $sample )
{
print OUT "<li><A HREF=\"$tmp_fn\">sample $tmp_fn</A>\n" ;
last ;
}
} # end while (<TMP>)
close TMP ; # close index.htm* file
} # end fetch_title
#
#***************************************************
#*
#* fetch_title_end - scan for </title on separate line.
#*
#* $input_line will be reassembled into
#* <title>text string</title
#* $t_stop will contain column number for </title
#*
#***************************************************
sub fetch_title_end
{
while (<TMP>)
{
chomp ; # kill the line feed char
$input_line = "$input_line$_" ; # append line to last line read
$lc_line = lc ($input_line) ; # lower case linput ine
$t_stop = index ($lc_line, "</title" ) ;
if ($t_stop >= 0) # found </title
{
last ; # break out of while loop
}
} # end while (<TMP>)
} # end fetch_title_end
#*************** end create_index.pl ****************************************
--- MBSE BBS v1.0.7.12A (GNU/Linux-x86_64)
* Origin: A noiseless patient Spider (2:250/1@fidonet)