#!/usr/bin/perl -w # - - - - - - - - - - - - - - - - - - - # AntiFAQ # - - - - - - - - - - - - - - - - - - - # Program: AntiFAQ # Version: 0.5.2---->X # Author: Andreas Warnke # License: GPL # - - - - - - - - - - - - - - - - - - - # ABSTRACT # - - - - - - - - - - - - - - - - - - - # AntiFAQ builds an index for a given document folder. # Html files are generated that link all documents together. # - - - - - - - - - - - - - - - - - - - # HOWTO / README # - - - - - - - - - - - - - - - - - - - # 1.) Copy the program AntiFAQ.pl to the root folder of your # document tree. # 2.) Open a terminal and change into your doc-root directory. # 3.) Call "./AntiFAQ.pl". Now the index.html files are generated. # 4.) When finished, view your complete document tree in a browser! # Note 1) Before modifying your folder structure, you should # call "./UnAntiFAQ.sh" to remove the AntiFAQ-index files. # Note 2) Use this program on your own risk! No Warranty. # - - - - - - - - - - - - - - - - - - - # HISTORY # - - - - - - - - - - - - - - - - - - - # 2003-12-15 Version 0.5.2 Decision if inlining text is based on # filenameextension instead of -T (isInlineFile) # showDirContents updated. # 2003-12-07 Version 0.5.1 Display of file/folder sizes changed # 2003-12-06 Version 0.5 The undo-file is written immediately; # getLinkFile() generates unique names # improving parameters of html functions # fix for target frames of links # 2003-11-09 Version 0.4.4 layout fixes, # hexencode encodes exactly 2 digits # 2003-11-06 Version 0.4.3 directory cache, nicer layout # 2003-11-04 Version 0.4.2 calculating bytes and files of folders, # changes to abstract generation # 2003-11-03 Version 0.4.1 better recognition of text files # 2003-08-17 Version 0.4 Abstract for Directory view. # 2003-08-14 Version 0.3.3 Inlining text files # 2003-08-14 Version 0.3.2 Code Review, Bugfixes in encodings # 2003-08-03 Version 0.3.1 Bugfixes in Undofile # 2003-07-27 Version 0.3 Restructuring # 2003-07-23 Version 0.2 Bug Fixes # 2003-07-19 Version 0.1 Initial version # - - - - - - - - - - - - - - - - - - - # TODO # - - - - - - - - - - - - - - - - - - - # Fileformat: Unicode files are not recognized # Fileformat: real thumbnails are missing # Layout: width problems in left table column when inlining texts # Layout: inline pages dont highlight the selected file in the menu # Layout: the download link of inline pages is too far on the right side # Layout: named Targets in left frame to avoid scrolling # User Interface: progressbar during index generation # Bug: print() on closed filehandle # if directory tree too deep or pathname too long (?) # - - - - - - - - - - - - - - - - - - - # declarations # - - - - - - - - - - - - - - - - - - - # Undo: $UndoFilename = "./UnAntiFAQ.sh"; # name of the undo file # cache statistics: %path_bytes = (); # count the bytes for a given path %path_files = (); # count the files for a given path # Directory Caches: %subfoldercache = (); # get a list of folders for a given path %fileentrycache = (); # get a list of files for a given path # hash to link generated files to their originals: %indexForFile = (); # determine generated file for a given original file and link type %fileForIndex = (); # determine original file and link type for a given generated file # html parts: $menu_head = <<'EOF'; # html sequence starting the tree
AntiFAQ
EOF $menu_tail = <<'EOF'; # html sequence ending the tree
. . . .
EOF # - - - - - - - - - - - - - - - - - - - # main # - - - - - - - - - - - - - - - - - - - # remove all generated files and create an empty undo file: undo(); createUndo (); # calculate hash of all directory sizes and filecounts calcStatistics('.'); # create the indices: indexRec ( '.' ); # finish the undo file: addUndo( $UndoFilename); # - - - - - - - - - - - - - - - - - - - # Index Generation # - - - - - - - - - - - - - - - - - - - sub indexRec { # This function determines which index files need to # be generated and calls the appropriate html-generation # functions # params: # $path: folder where index files shall be generated my ($path) = @_; # dont create index if already existant: if ( -e "$path/index.html" ) { return; } # get files and folders: my %entries = readAndFilterFolder ( $path ); my @subfolders = @{$entries{subfolders}}; my @fileentries = @{$entries{fileentries}}; # create index file print " createIndexFile ('$path');\n"; createIndexFile( $path, 'index.html'); my $menuFrame = getLinkFile ('Menu',$path,'menu'); print " createFrameIndex ('$path', '$menuFrame');\n"; createFrameIndex( $path, $menuFrame); # create frames foreach my $unframed (@fileentries) { my $framefile = getLinkFile ('Main',$path,$unframed); if ( isInlineFile ( "$path/$unframed" ) != 0 ) { print " createInlinePage('$path', '$framefile', '$unframed');\n"; createInlinePage( $path, $framefile, $unframed); } else { print " createFrame('$path', '$framefile', '$unframed');\n"; createFrame( $path, $framefile, $unframed); } } # create index file for subfolders foreach my $folder (@subfolders) { indexRec ( "$path/$folder" ); } } # - - - - - - - - - - - - - - - - - - - # HTML Functions # - - - - - - - - - - - - - - - - - - - sub createFrame { # creates a frameset; # the left frame is linked to the menu-html file, # the right frame is linked to the original doc-file # params: # $path folder where destfile exists and framefile is generated # $framefile: name of the file to be generated # $destfile: link-destination for the right frame my ($path,$framefile,$destfile) = @_; addUndo("$path/$framefile"); open IDX, ">$path/$framefile"; print IDX <<'EOF'; EOF print IDX htmlencode($destfile); print IDX <<'EOF'; EOF print IDX " \n"; print IDX " \n"; print IDX <<'EOF'; EOF close IDX; } sub createIndexFile { # Creates an index file for a folder: # the left table colunm shows the file-system structure, # the right table colunm shows the contents of the folder # params: # $path: folder for which to generate an index file # $indexfile is always 'index.html' my ($path,$indexfile) = @_; addUndo ("$path/$indexfile"); open IDX, ">$path/$indexfile"; print IDX <<'EOF'; EOF print IDX htmlencode($path); print IDX <<'EOF';
EOF print IDX $menu_head; printDir ( "$path", '.', "$path", '', 0, 0 ); print IDX $menu_tail; print IDX <<'EOF'; EOF print IDX '
'.htmlencode($path)."\n

\n"; showDirContents ( $path ); print IDX <<'EOF';

EOF close IDX; } sub createInlinePage { # Creates an index file for a folder: # the left table colunm shows the file-system structure, # the right table colunm shows the contents of the folder # params: # $path: directory where to create the inline page # $htmlfile: filename of the generated file # $destfile: filename of the original file my ($path,$htmlfile,$destfile) = @_; addUndo ("$path/$htmlfile"); open IDX, ">$path/$htmlfile"; print IDX <<'EOF'; EOF print IDX htmlencode($destfile); print IDX <<'EOF';
EOF print IDX $menu_head; printDir ( "$path", '.', "$path", '', 1, 0 ); print IDX $menu_tail; print IDX <<'EOF'; EOF print IDX '
'."\n"; print IDX ' Download '.htmlencode($destfile)."
\n";; print IDX '
'."\n"; if ( isInlineFile ( "$path/$destfile" ) == 1 ) { print IDX '
';
    open INLINE, "$path/$destfile";
    while () {
      print IDX htmlencode($_);
    }
    close INLINE;
    print IDX '      
'."\n"; } print IDX '

'."\n"; print IDX <<'EOF'; EOF close IDX; } sub createFrameIndex { # creates an html file displaying the file system structure. # this file is displayed in the left frame of a frameset. # params: # $path: directory where to create the menu frame # $indexFile: filename of the html-menu my ($path,$indexFile) = @_; addUndo ("$path/$indexFile"); open IDX, ">$path/$indexFile"; print IDX <<'EOF'; EOF print IDX htmlencode($path); print IDX <<'EOF'; EOF print IDX $menu_head; printDir ( "$path", '.', "$path", 'target="_parent"', 1, 0 ); print IDX $menu_tail; print IDX <<'EOF'; EOF close IDX; } sub printDir { # displays the file system structure # param: # IDX global file handle for output # $docpath: folder where the generated html file is stored # $rootpath: root folder of the file system part that shall be displayed # $openpath: path that shall be expanded # $targettag: either 'target="_parent"' or empty # $showfiles: true if files in the $openpath shall be displayed # $maxdepth: maximum expansion depth - except for the open path # $currentdepth: recursion depth; should be omitted by external calls my ($docpath, $rootpath, $openpath, $targettag, $showfiles, $maxdepth, $currentdepth ) = @_; if (!defined($currentdepth)) { $currentdepth=0; }; # debug: # print "printDir ($docpath, $rootpath, $openpath, $targettag, " # ."$showfiles, $maxdepth, $currentdepth )called \n"; #recursion end return if (($currentdepth > $maxdepth) && (relPath($rootpath, $openpath) =~ m/^\.\./)); # get files and folders: my %entries = readAndFilterFolder ( $rootpath ); my @subfolders = @{$entries{subfolders}}; my @fileentries = @{$entries{fileentries}}; #now print the files: if ($showfiles && ($openpath eq $rootpath)) { foreach my $file (@fileentries) { # space according to recurion depth: for ( my $i = $currentdepth; $i > 0; $i -- ) { print IDX '    '; } print IDX "-  "; # print the filename: my $linkfile = getLinkFile('Main',$docpath,$file); print IDX '".htmlencode($file).""; print IDX "
\n"; } } #now print the subdirectories: foreach my $folder (@subfolders) { # space according to recurion depth: for ( my $i = $currentdepth; $i > 0; $i -- ) { print IDX '    '; } print IDX "+  "; # print the foldername: print IDX '"; my $strong = ( "$rootpath/$folder" eq $openpath ); if ( $strong ) { print IDX ""; } print IDX htmlencode($folder); if ( $strong ) { print IDX ""; } print IDX ''; print IDX "
\n"; # recursion: printDir ( $docpath, "$rootpath/$folder", $openpath, $targettag, $showfiles, $maxdepth, ($currentdepth + 1)); } }; sub showDirContents { # displays the contents of a folder # param: # IDX global file handle where to write to # $dirpath: folder to display my ($dirpath ) = @_; print "showDirContents('$dirpath') called\n"; # get files and folders: my %entries = readAndFilterFolder ( $dirpath ); my @subfolders = @{$entries{subfolders}}; my @fileentries = @{$entries{fileentries}}; # start the table: my $colid = 1; my $rowid = 0; print IDX " \n"; #now print the files: foreach my $file (@fileentries) { my $linkfile = getLinkFile('Main',$dirpath,$file); $rowid++; if ($rowid>2) { $rowid=1; $colid++; print IDX " \n";} print IDX <<'EOF'; \n"; } #now print the subdirectories: foreach my $folder (@subfolders) { $rowid++; if ($rowid>2) {$rowid=1; $colid++; print IDX " \n"; } print IDX " \n"; } # end the table: while ($rowid<2) {$rowid++; print IDX " \n"; } print IDX "
 
EOF print IDX ' '.htmlencode($file).'  '.displayStats("$dirpath/$file"); print IDX "
\n"; # print file/folder content: if ($file =~ m/\.(gif|jpeg|jpg|png)$/i ) { print IDX ''."\n"; } elsif ( isInlineFile( "$dirpath/$file" ) != -1 ) { print IDX "\n"; my $linenr=0; open ABSTRACT, "$dirpath/$file"; while (($linenr<7)&&($_=)){ foreach $_ (split "\r", $_) { # recognize mac linebreaks next if ( $linenr >= 7 ); if ( $file =~ m/(htm|html|xml|xhtml)$/i ) { $_ =~ s/<[^>]*>//g; # ignore sgml meta infos } if ( m/[a-zA-Z0-9]/ ) { # ignore lines without letters and digits #if (! m/^[[:space:]]*$/) { print IDX "       ".htmlencode(substr $_,0,48)."
\n"; $linenr++; } } } close ABSTRACT; print IDX "       ..." if ($linenr>=7); print IDX "
\n"; } else { #print IDX "unprocessed..."; } print IDX "
\n"; if ($colid>1) { print IDX "
\n"; } print IDX '+  '.htmlencode($folder).'  '.displayStats("$dirpath/$folder"); print IDX "
\n"; # dircontents: print IDX " \n"; printDir ( $dirpath, "$dirpath/$folder", "$dirpath/$folder", '', 1, 1, 1 ); print IDX " \n"; print IDX "
  
\n"; }; sub isInlineFile { # determines, if the given file shall be inlined # param: # $path: path to file # returns -1 if only a download link shall be displayed # 0 if file shall be displayed in a frame # 1 if file shall be inlined my ($path ) = @_; # determine if this file shall be inlined (by fileextension): if ( $path =~ m/\.(htm|html|pdf|gif|png|jpg|jpeg|bmp|avi|mov|xml|xhtml)$/i ) { return 0; } if ( $path =~ m/\.(ps|eps|tar|gz|tgz|zip|sit|mp3|jar)$/i ) { return -1; } # determine stats: my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = stat ( "$path" ); # determine if this file shall be inlined (by fileextension): if ( $path =~ m/\.(pas|p|c|cc|h|cpp|c\+\+|java|pl|ml|moc|pro)$/i ) { return (($size<64*1024*1024)?1:0); } if ( $path =~ m/\.(txt|tex|vcf|sh|sed|gnuplot|awk|dtd|xsl|csv|css)$/i ) { return (($size<64*1024*1024)?1:0); } if ( $path =~ m/(\/makefile)$/i ) { return (($size<64*1024*1024)?1:0); } # fallback algorithm: if (( $size < 8*1024*1024 ) && ( -T $path )) { return 1; } return ( -1 ); } # - - - - - - - - - - - - - - - - - - - # File Functions # - - - - - - - - - - - - - - - - - - - sub getLinkFile { # gets the name of an index file to be generated # for a given original doc-file # params: # $linkType is "Menu" for the left frames of framesets # "Main" for the framesets or frameless pages # $directory is the directory where to find a unique filename # $realFile file to which the generated name belongs to # (%indexForFile: global hash) # (%fileForIndex: global hash) my ($linkType, $directory, $realFile) = @_; # look into cache: if ( exists $indexForFile{"$linkType:$directory/$realFile"} ) { return $indexForFile{"$linkType:$directory/$realFile"}; } # search an unused filename in that directory: my $number = 0; my $linkFile; do { $linkFile = $realFile; $linkFile =~ s/\//_/g; #Make MacUsers happy: if ( length $linkFile > 24 ) { $linkFile = (substr $linkFile,0,12).(substr $linkFile, -8); } #generate a filename proposal $linkFile .= ".F_".hexencode($number,4).".html"; # prepare next loop: $number ++; # debug # print "getLinkFile(...) tests $directory/$linkFile"; } until ((!(-e "$directory/$linkFile")) && ((!exists($fileForIndex{"$directory/$linkFile"})) || ($fileForIndex{"$directory/$linkFile"} eq "$linkType:$directory/$realFile" ))); # cache and finish: $indexForFile{"$linkType:$directory/$realFile"} = $linkFile; $fileForIndex{"$directory/$linkFile"} = "$linkType:$directory/$realFile"; return $linkFile; } sub readAndFilterFolder { # reads the contents of a folder and # returns the containted files and subfolders # params: # $directory is the folder to read # (%subfoldercache: global hash) # (%fileentrycache: global hash) # (%fileForIndex: global hash) my ($directory) = @_; # check cache: if ( exists $fileentrycache{$directory} ) { return ( 'subfolders' => $subfoldercache{$directory}, 'fileentries' => $fileentrycache{$directory} ); } #now get the directory list: opendir DIRHANDLE, $directory; my @allentries = grep !($_ =~ m/^\./), readdir DIRHANDLE; my @subfolders = grep -d, map "$directory/$_", @allentries; my @fileentries = grep -f, map "$directory/$_", @allentries; closedir DIRHANDLE; #extract the names from the path @subfolders = map { @_ = split '/', $_; pop @_ } @subfolders; @fileentries = map { @_ = split '/', $_; pop @_ } @fileentries; #filter all generated files: @fileentries = grep ! $fileForIndex{"$directory/$_"}, @fileentries; #all subfolders that contain index files become fileentries: my @resultsubfolders = (); foreach my $folder (@subfolders) { my $check_index = "$directory/$folder/index.html"; my $check_index2 = "$directory/$folder/index.htm"; if ((-e $check_index) && (!($fileForIndex{$check_index}))) { push @fileentries, "$folder/index.html"; } elsif ((-e $check_index2) && (!($fileForIndex{$check_index2}))) { push @fileentries, "$folder/index.htm"; } else { push @resultsubfolders, $folder; } } #ignore the AntiFAQ program: if ($directory eq '.') { @fileentries = grep !/^(AntiFAQ\.pl|UnAntiFAQ\.sh)$/, @fileentries; } #cache and ready: $subfoldercache{$directory} = \@resultsubfolders; $fileentrycache{$directory} = \@fileentries; return ( 'subfolders' => \@resultsubfolders, 'fileentries' => \@fileentries ); } sub relPath { # calculates a relative path from a source directory to # a destination file # params: # $from is the folder containing the link-source html file # $to is the link-destination; $to is relative if and only if $from is relative my ( $from, $to ) = @_; #print IDX '
FROM: '.$from.' '; #print IDX '
TO: '.$to.' '; $from =~ s/\/+/\//g; $from =~ s/^\.\///; $from =~ s/^\.$//; $from =~ s/\/\.$//; $from =~ s/\/(\.\/)+/\//g; $to =~ s/\/+/\//g; $to =~ s/^\.\///; $to =~ s/^\.$//; $to =~ s/\/\.$//; $to =~ s/\/(\.\/)+/\//g; my @fromparts = split '/', $from; my @toparts = split '/', $to; while ( scalar @fromparts && scalar @toparts && $fromparts[0] eq $toparts[0] ) { shift @fromparts; shift @toparts; } my $result = join '/', @toparts; foreach (@fromparts) { $result = '../'.$result; } #print IDX '
REL: '.$result.' '; return $result; } # - - - - - - - - - - - - - - - - - - - # Undo Functions # - - - - - - - - - - - - - - - - - - - sub createUndo { # params: # ($UndoFilename: global variable) open UNDO, ">$UndoFilename"; print UNDO "#!/bin/sh\necho removing AntiFAQ index files...\n"; close UNDO; chmod 0755, $UndoFilename; } sub addUndo { # params: # $generated_file pathname to add to the undo file # ($UndoFilename: global variable) my $generated_file = shift @_; open UNDO, ">>$UndoFilename"; $generated_file =~ s/'/'"'"'/g; print UNDO ("rm -v \'".$generated_file."\'\n" ); close UNDO; } sub undo { # params: # ($UndoFilename: global variable) return unless (-e $UndoFilename); open UNDO_EXEC, $UndoFilename; my @files2remove; while () { if ( m/^rm -v '.*'$/ ) { push @files2remove, substr $_, 7, (length($_)-9); } } close UNDO_EXEC; @files2remove = map {$_ =~ s/'"'"'/'/g; $_;} @files2remove; foreach my $deleteme (@files2remove) { if ( -e $deleteme ) { print " rm $deleteme \n"; unlink $deleteme; } else { print "FILE NOT FOUND: $deleteme \n"; } } } # - - - - - - - - - - - - - - - - - - - # Encoding Functions # - - - - - - - - - - - - - - - - - - - sub htmlencode { my ($plain) = @_; $plain =~ s/&/&/g; $plain =~ s//>/g; $plain =~ s/"/"/g; return $plain; } sub urlencode { my ($plain) = @_; my $crypt = ""; for ( my $pos = 0; $pos < length($plain); $pos ++ ) { my $add = substr $plain, $pos, 1; if ( $add =~ m/[a-zA-Z0-9\/\.]/ ) { $crypt.=$add; } else { $crypt.='%'.hexencode(ord($add),2); } } return $crypt; } sub hexencode { # params: # $int number to encode # $digits length of the generated string my ($int,$digits) = @_; my $hex=''; while ( $digits > 0 ) { my $lastdigit = $int % 16; $int = int ($int / 16); if ( $lastdigit < 10 ) { $hex = chr(48+$lastdigit).$hex; } else { $hex = chr(65-10+$lastdigit).$hex; } $digits --; } return $hex; } # - - - - - - - - - - - - - - - - - - - # Statistic Functions # - - - - - - - - - - - - - - - - - - - sub displayStats { # gets an html string containing the formatted file/folder statistics # params: # $path file/folder where statistics shall be calculated. my ($path) = @_; $path =~ s/\/index.htm(l)?$//; # print the statistics: my $all_files = $path_files{"$path"}; my $all_bytes = $path_bytes{"$path"}; my $unit = 'B'; if (defined($all_bytes)) { if ($all_bytes>=2*1024*1024) {$all_bytes=int($all_bytes/(1024*1024));$unit='MB';} elsif ($all_bytes>=2*1024) {$all_bytes=int($all_bytes/(1024));$unit='kB';}; return "($all_files files/ $all_bytes $unit)"; } elsif ( -e $path ) { my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = stat ( "$path" ); if ($size>=2*1024*1024) {$size=int($size/(1024*1024));$unit='MB';} elsif ($size>=2*1024) {$size=int($size/(1024));$unit='kB';}; return "($size $unit)"; } else { return ''; } } sub calcStatistics { # calculates the number of files an folders for all # valid paths beginning at the root folder $path # params: # $path folder where statistics shall be calculated. # return values: # (%path_bytes: global hash, count the bytes for a given path) # (%path_files: global hash, count the files for a given path) my ($path) = @_; my $total_bytes=0; my $total_files=0; # get files and folders: opendir DIRHANDLE, $path; my @allentries = grep !($_ =~ m/^\./), readdir DIRHANDLE; my @subfolders = grep -d, map "$path/$_", @allentries; my @fileentries = grep -f, map "$path/$_", @allentries; closedir DIRHANDLE; # calc stats of subfolders foreach my $folder (@subfolders) { calcStatistics ( "$folder" ); $total_bytes += $path_bytes{"$folder"}; $total_files += $path_files{"$folder"}; #$total_files += 1; # folders are no files - are they? } # calc bytes of files foreach my $file (@fileentries) { my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = stat ( "$file" ); $total_bytes += $size; $total_files += 1; } # sum stats: $path_bytes{$path}=$total_bytes; $path_files{$path}=$total_files; print " calcStatistics('$path')=($total_files,$total_bytes);\n"; } # - - - - - - - - - - - - - - - - - - - # The End # - - - - - - - - - - - - - - - - - - - 1;