package CONF; # # Configuration file for RePEc log summarizing script rstat.pl # # Optional sub LOG_PREPROC at end provides the possiblity of preprocessing # log lines before rstat.pl takes a look at it. # Service we are collecting stats for, any unique description will do $SERVICE = 'EconomistsOnline'; # default to doing statistics for current month, -c option #$CURRENT = 1; # compress (gz) output file, -z option #$COMPRESS = 1; # FTP output file to specified location, -ftp option # this will cause output to be gzipped. #$FTP = 'username:password@server/dir'; #$FTP = 'logec:xxx@logec.repec.org/upload/logec'; # Must replace xxx with password # IP-numbers to exclude from statistics $STAT_IP_EXCLUDE = ''; # beginning of path to abstract html-files as shown in server log # see sub Path_2_handle #$SITE_PATH = '/ideas/data/Papers/'; # should suite IDEAS #$SITE_PATH = '/(=?WoPEc|BibEc)/data/Papers/'; # should suite WoPEc and BibEc $SITE_PATH = ''; #$SITE_PATH = '/[a-z]{6}/(?:abs|papers)/'; # for hhs # path to redirection script # see sub Redir_2_handle $SITE_REDIR_SCRIPT = '/adnetec-cgi-bin/get_doc.pl'; # should suite WoPEc $SITE_REDIR_SCRIPT = 'DOWNLOAD_'; #$SITE_REDIR_SCRIPT = '/scripts/redir.pl'; # for hhs # default log format # W3C = W3C extended log file format as per http://www.w3.org/TR/WD-logfile.html # NCSA = NCSA "common" log file format # MS = Microsoft IIS log file format $DEFLOG = 'NCSA'; # ignore stuff at start of line with NCSA log format #$SHIFT = 1; # skips first item in log line useful when the server name appears first. # IP is declared a robot IP if the number of hits from a single IP to URLs we # care about exceeds this. # Expressed as a fraction of the URLs in the logs we care about. # IPs are also declared robot IPs if they hit /robots.txt $ROBOT_FRACTION = 0.5; $AUTHORITY = 'RePEc'; #---------------------------- sub Redir_2_handle {} #--------------------- sub Path_2_handle { # # Figure out the handle of a paper from url for abstract access # (really URLs that match $SITE_PATH). # # Arguments are uri-stem and uri-query (normally empty, -) # # Returns handle (lower cased) and 'a' if this is for the abstract # or 'd' if this is for a download. # Return two nul strings if we should not collect statistics for this # my( $stem, $query ) = @_; if ( $stem=~ m'ABS_VIEW_RePEc\:([a-z]{3})\:([a-z0-9]{6})\:(.+?)$'i ) { @return = split('ABS_VIEW_',$stem); warn("query: $return[1] ok."); return (lc($return[1]), 'a' ); } elsif( $stem=~ m'DOWNLOAD_RePEc\:([a-z]{3})\:([a-z0-9]{6})\:(.+?)$'i ) { @return = split('DOWNLOAD_',$stem); warn("Download query: $return[1] ok."); return (lc($return[1]), 'd' ); } else { warn("query: '$stem not ok."); return ( '', '' ); } } #---------------------------- #sub LOG_PREPROC { # # If defined this subroutine is called with the log line as # argument prior to any processing of the log line # #my( $line ) = @_; # #$line =~ s/foo/bar/; # #return $line; # #} #---------------------------- #sub IS_ROBOT { # # If defined this subroutine overrides the default # check for robots which just checks for accesses to /robots.txt # as below # #my( $stem, $query ) = @_; # #return $stem =~ "/robots.txt"; # #} 1; # return true