JFIFxxC      C  " }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3RbrJFIFxxC      C  " }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbrў fc@sddZddlZddlZdgZGdddZGdddZGdddZdS( u< robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html iNuRobotFileParsercBs|EeZdZdZdddZddZddZd d Zd d Zd dZ ddZ ddZ ddZ dS(uRobotFileParserus This class provides a set of methods to read, parse and answer questions about a single robots.txt file. ucCs>g|_d|_d|_d|_|j|d|_dS(NiF(uentriesuNoneu default_entryuFalseu disallow_allu allow_alluset_urlu last_checked(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s      uRobotFileParser.__init__cCs|jS(uReturns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. (u last_checked(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumtimesuRobotFileParser.mtimecCsddl}|j|_dS(uYSets the time the robots.txt file was last fetched to the current time. iN(utimeu last_checked(uselfutime((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumodified(s uRobotFileParser.modifiedcCs5||_tjj|dd\|_|_dS(u,Sets the URL referring to a robots.txt file.iiN(uurluurllibuparseuurlparseuhostupath(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuset_url0s uRobotFileParser.set_urlcCsytjj|j}Wnatjjk r|}z;|jdkrOd|_n|jdkrjd|_ nWYdd}~Xn)X|j }|j |j dj dS(u4Reads the robots.txt URL and feeds it to the parser.iiiNuutf-8(iiT(uurlliburequestuurlopenuurluerroru HTTPErrorucodeuTrueu disallow_allu allow_allureaduparseudecodeu splitlines(uselfufuerruraw((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuread5s  uRobotFileParser.readcCsAd|jkr-|jdkr=||_q=n|jj|dS(Nu*(u useragentsu default_entryuNoneuentriesuappend(uselfuentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu _add_entryBsuRobotFileParser._add_entrycCsd}t}x|D]}|sn|dkr@t}d}qn|dkrn|j|t}d}qnn|jd}|dkr|d|}n|j}|sqn|jdd}t|dkr|djj|ds u+RobotFileParser.__str__..(ujoinuentries(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__suRobotFileParser.__str__N( u__name__u __module__u __qualname__u__doc__u__init__umtimeumodifieduset_urlureadu _add_entryuparseu can_fetchu__str__(u __locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRobotFileParsers   3 cBs>|EeZdZdZddZddZddZdS( uRuleLineuoA rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.cCs\|dkr| rd}ntjjtjj|}tjj||_||_dS(NuT(uTrueuurllibuparseu urlunparseuurlparseuquoteupathu allowance(uselfupathu allowance((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s  uRuleLine.__init__cCs|jdkp|j|jS(Nu*(upathu startswith(uselfufilename((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu applies_tosuRuleLine.applies_tocCs|jrdpdd|jS(NuAllowuDisallowu: (u allowanceupath(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__suRuleLine.__str__N(u__name__u __module__u __qualname__u__doc__u__init__u applies_tou__str__(u __locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRuleLines  uRuleLinecBsJ|EeZdZdZddZddZddZdd Zd S( uEntryu?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS(N(u useragentsu rulelines(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s uEntry.__init__cCsjg}x'|jD]}|jd|dgqWx*|jD]}|jt|dgq:Wdj|S(Nu User-agent: u u(u useragentsuextendu rulelinesustrujoin(uselfuretuagentuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__s u Entry.__str__cCs]|jddj}x=|jD]2}|dkr9dS|j}||kr#dSq#WdS(u2check if this entry applies to the specified agentu/iu*TF(usplituloweru useragentsuTrueuFalse(uselfu useragentuagent((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu applies_tos   uEntry.applies_tocCs.x'|jD]}|j|r |jSq WdS(uZPreconditions: - our agent applies to this entry - filename is URL decodedT(u rulelinesu applies_tou allowanceuTrue(uselfufilenameuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu allowances uEntry.allowanceN(u__name__u __module__u __qualname__u__doc__u__init__u__str__u applies_tou allowance(u __locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuEntrys    uEntry(u__doc__u urllib.parseuurllibuurllib.requestu__all__uRobotFileParseruRuleLineuEntry(((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu s