\h@ "dZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z ddlmZddlmZmZmZmZmZmZmZmZmZmZmZddlmZddlmZddl m!Z!m"Z"dd l#m$Z$dd l%m&Z&dd l'm(Z(dd l)m*Z*dd l+m,Z,ddl-m.Z.ddl/m0Z0ddl1m2Z2ddl3m4Z4m5Z5m6Z6erddlm7Z7ne8Z7ejre:Z;eeGdde?Z@deddfdZAGdde?ZBde$.FM c,eZdZdededdffd ZxZS)_NotAPIContent content_type request_descr#NcBt|||||_||_yN)super__init__r/r0)selfr/r0 __class__s r*r4z_NotAPIContent.__init__Cs" |4((r,)__name__ __module__ __qualname__strr4 __classcell__r6s@r*r.r.Bs")S))))r,r.responsec|jjdd}|j}|jdryt ||j j )z Check the Content-Type header to ensure the response contains a Simple API Response. Raises `_NotAPIContent` if the content type is not a valid content-type. Content-TypeUnknown)z text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr&r'r.requestmethod)r=r/content_type_ls r*_ensure_api_headerrGIs[##'' BL!'')N     x'7'7'>'> ??r,c eZdZy)_NotHTTPN)r7r8r9r,r*rIrI_sr,rIsessionctjj|\}}}}}|dvr t|j |d}t |t |y)z Send a HEAD request to the URL, and ensure the response contains a simple API Response. Raises `_NotHTTP` if the URL is not available for a HEAD request, or `_NotAPIContent` if the content type is not a valid content type. >httphttpsT)allow_redirectsN)urllibparseurlsplitrIheadrrG)r"rKr)netlocpathqueryfragmentresps r*_ensure_api_responserYcsV-3LL,A,A#,F)FFD% &&j <<T< 2DTtr,ctt|jr t||tj dt ||j|djgddd}t|t|tj dt ||jjd d |S) aYAccess an Simple API response with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to check the Content-Type is HTML or Simple API, to avoid downloading a large file. Raise `_NotHTTP` if the content type cannot be determined, or `_NotAPIContent` if it is not HTML or a Simple API. 2. Actually perform the request. Raise HTTP exceptions on network failures. 3. Check the Content-Type header to make sure we got a Simple API response, and raise `_NotAPIContent` otherwise. rKzGetting page %sz, )rAz*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z max-age=0)Acceptz Cache-Control)rBzFetched page %s as %sr?r@) rrfilenamerYloggerdebugrrCjoinrrGrB)r"rKrXs r*_get_simple_responseraustCy))*S'2 LL"$8$=> ;; ii()+   D4Tt LLS! 3 Kr,rBc|rHd|vrDtjj}|d|d<|jd}|r t |Sy)z=Determine if we have any encoding information in our headers.r?z content-typecharsetN)emailmessageMessage get_paramr:)rBmrcs r*_get_encoding_from_headersrisK>W, MM ! ! ##N3.++i( w<  r,c0eZdZddZdedefdZdefdZy)CacheablePageContentr#Nc.|jsJ||_yr2)cache_link_parsingpager5rns r*r4zCacheablePageContent.__init__s&&&& r,otherct|t|xr-|jj|jjk(Sr2) isinstancetypernr")r5rps r*__eq__zCacheablePageContent.__eq__s-%d,P%**..1PPr,c@t|jjSr2)hashrnr"r5s r*__hash__zCacheablePageContent.__hash__sDIIMM""r,)rn IndexContentr#N) r7r8r9r4objectboolrtintrxrJr,r*rkrks)QFQtQ###r,rkc"eZdZdddeefdZy) ParseLinksrnryr#cyr2rJros r*__call__zParseLinks.__call__s r,N)r7r8r9rrrrJr,r*r~r~s ^  r,r~fnctjddtdttffd tj dddttffd }|S) z Given a function that parses an Iterable[Link] from an IndexContent, cache the function's result (keyed by CacheablePageContent), unless the IndexContent `page` has `page.cache_link_parsing == False`. N)maxsizecacheable_pager#c:t|jSr2)listrn)rrs r*wrapperz*with_cached_index_content..wrappersB~**+,,r,rnryc`|jrt|St|Sr2)rmrkr)rnrrs r*wrapper_wrapperz2with_cached_index_content..wrapper_wrappers+  " "/56 6BtH~r,) functools lru_cacherkr rwraps)rrrs` @r*with_cached_index_contentrsl&- 4-d-'-__Rnd r,rnryc#TK|jj}|jdr^tj|j }|j dgD])}tj||j}|&|+yt|j}|jxsd}|j|j j||j}|jxs|}|jD]!} tj | ||}||#yw)z\ Parse a Simple API's Index Content, and yield its anchor elements as Link objects. rAfilesNzutf-8)page_urlbase_url)r/r&r'jsonloadscontentrCr from_jsonr"HTMLLinkParserencodingfeeddecoderanchors from_element) rnrFdatafilelinkparserrr"ranchors r* parse_linksrs &&,,.N  !FGzz$,,'HHWb) D>>$1D|J    DHH %F}}'H KK ##H-. ((C%#H..  #I <  sD&D(c FeZdZdZ d dededeedededdf d Zdefd Z y) ryz5Represents one response (or page), along with its URLrr/rr"rmr#NcJ||_||_||_||_||_y)am :param encoding: the encoding to decode the given content. :param url: the URL from which the HTML was downloaded. :param cache_link_parsing: whether links parsed from this page's url should be cached. PyPI index urls should have this set to False, for example. N)rr/rr"rm)r5rr/rr"rms r*r4zIndexContent.__init__s) (  "4r,c,t|jSr2)rr"rws r*__str__zIndexContent.__str__s#DHH--r,T) r7r8r9__doc__bytesr:r r{r4rrJr,r*ryrysY?$( 5553- 5  5 ! 5 5*..r,ceZdZdZdeddffd ZdedeeeeefddfdZ deeeeefdeefd Z xZ S) rzf HTMLParser that keeps the first base HREF and a list of all anchor elements' attributes. r"r#NcPt|d||_d|_g|_y)NT)convert_charrefs)r3r4r"rr)r5r"r6s r*r4zHTMLLinkParser.__init__#s( $/'+ 79 r,tagattrsc|dk(r(|j|j|}|||_yy|dk(r%|jjt |yy)Nbasea)rget_hrefrappenddict)r5rrhrefs r*handle_starttagzHTMLLinkParser.handle_starttag*sT &=T]]2=='D $  CZ LL  U ,r,c*|D]\}}|dk(s |cSy)NrrJ)r5rnamevalues r*rzHTMLLinkParser.get_href2s&  KD%v~  r,) r7r8r9rr:r4r rr rrr;r<s@r*rrss :C:D:-3-tE#x}:L4M/N-SW-d5hsm);#<=(3-r,rrreasonmeth).Nc<|tj}|d||y)Nz%Could not fetch URL %s: %s - skipping)r^r_)rrrs r*_handle_get_simple_failr9s  ||| 0$?r,rmct|j}t|j|jd||j|S)Nr?)rr"rm)rirBryrr")r=rmrs r*_make_index_contentrCsE*(*:*:;H ( LL-  r,c|jjddd}t|}|rtj d||yt j j|\}}}}}}|dk(rtjjt jj|rL|jds|dz }t j j|d}tjd| t!|| }t#||j$ S#t&$rtj d |Yyt($r6}tj d ||j*|j,Yd}~yd}~wt.$r}t1||Yd}~yd}~wt2$r}t1||Yd}~yd}~wt4$r6}d } | t7|z } t1|| tj8Yd}~yd}~wt:j<$r}t1|d|Yd}~yd}~wt:j>$rt1|dYywxYw)N#rrzICannot look at %s URL %s because it does not support lookup as web pages.r/z index.htmlz# file: URL is directory, getting %sr[)rmz`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )rzconnection error: z timed out) r"splitr+r^warningrPrQurlparseosrUisdirrD url2pathnameendswithurljoinr_rarrmrIr.r0r/rrrrr:inforConnectionErrorTimeout) rrKr" vcs_schemer)_rUrXexcrs r*_get_index_contentrPs ((..a  #C#3'J W    &||44S9FAtQ1 BGGMM&..*E*Ed*KL||C 3JC ll""3 5 :C@U#C9:#4DH=,E77 H= F H= F11 H==,G..H=H"H=<H=c:eZdZUeeeed<eeeed<y)CollectedSources find_links index_urlsN)r7r8r9r r r__annotations__rJr,r*rrs"*-..*-..r,rc eZdZdZdededdfdZe ddedede ddfd Z e de e fd Zd edeefd Zd e dedefdZy) LinkCollectorz Responsible for collecting Link objects from all configured locations, making network requests as needed. The class's main method is its collect_sources() method. rK search_scoper#Nc ||_||_yr2)rrK)r5rKrs r*r4zLinkCollector.__init__s ) r,optionssuppress_no_indexc0|jg|jz}|jr0|s.tj ddj d|Dg}|j xsg}tj|||j}t||}|S)z :param session: The Session to use to make requests. :param suppress_no_index: Whether to ignore the --no-index option when constructing the SearchScope object. zIgnoring indexes: %s,c32K|]}t|ywr2r).0r"s r* z'LinkCollector.create..sIs-c2Is)rrno_index)rKr) index_urlextra_index_urlsrr^r_r`rrcreater)clsrKrrrrrlink_collectors r*rzLinkCollector.creates''(7+C+CC   $5 LL&IjII J''-2 "))!!%% '% r,c.|jjSr2)rrrws r*rzLinkCollector.find_linkss  +++r,locationc0t||jS)z> Fetch an HTML page containing package links. r[)rrK)r5rs r*fetch_responsezLinkCollector.fetch_responses"(DLLAAr, project_namecandidates_from_pagectjfdjjDj }tjfdj Dj }t jtjrwtj||Dcgc]}||jd|j!}}t|ddg|z}t jdj|t!t#|t#|Scc}w)Nc 3hK|])}t|jjdd+yw)Frpage_validator expand_dirrmrNr rKis_secure_originrlocrrr5s r*rz0LinkCollector.collect_sources..sA 4  %9#||<< #()    4 /2c 3hK|])}t|jjdd+yw)TrNrrs r*rz0LinkCollector.collect_sources..sA 5  %9#||<<#')    5 rz* z' location(s) to search for versions of : )rr) collections OrderedDictrget_index_urls_locationsvaluesrr^ isEnabledForloggingDEBUG itertoolschainrr(r_r`rr)r5rrindex_url_sourcesfind_links_sourcessliness``` r*collect_sourceszLinkCollector.collect_sourcess2 (33 4 ((AA,O 4   &( )44 5  5   &(    w}} -#);=NO=QVV%7QVVH E u:,##/.3E LL5) *./-.  s>$D<)F)r7r8r9rrrr4 classmethodrr{rpropertyr r:rrr ryrrrrrJr,r*rrs"   #(    B,DI,,BtB0FB , , 1,   , r,rr2r)Rrr email.messagerdrrrrr urllib.parserPurllib.request html.parserroptparsertypingrrrrr r r r r rr pip._vendorrpip._vendor.requestsrpip._vendor.requests.exceptionsrrpip._internal.exceptionsrpip._internal.models.linkr!pip._internal.models.search_scoperpip._internal.network.sessionrpip._internal.network.utilsrpip._internal.utils.filetypesrpip._internal.utils.miscrpip._internal.vcsrsourcesrrr r!rz getLoggerr7r^r:ResponseHeadersr+ Exceptionr.rGrIrYrarirkr~rrryrrr{rrrrrJr,r*rs+  "    !)@;*94899!AAH   8 $ c*38C=)Y)@@d@, y cJ4$<c<J<8<~HSM # #  *(n$8..8Z>+/@ @ #y. !@ 8I& '@ @48  ,0  :T:z:h~>V:z/z/ i i r,