当前位置: 首页 > Web前端 > CSS

Go的日常库goquery

时间:2023-03-31 13:00:50 CSS

绠€浠婫oquery鏄竴涓敤Go璇█缂栧啓鐨勭被浼间簬jQuery鐨勫簱銆傚畠鍩轰簬HTML瑙f瀽搴搉et/html鍜孋SS搴揷ascadia锛屾彁渚涚被浼间簬jQuery鐨勬帴鍙c€侴o钁楀悕鐨勭埇铏鏋禼olly灏辨槸鍩轰簬goquery銆傚揩閫熶娇鐢ㄦ湰鏂囦唬鐮佷娇鐢℅oModules銆傚垱寤轰竴涓洰褰曞苟鍒濆鍖栵細$mkdirgoquery&&cdgoquery$gomodinitgithub.com/darjun/go-daily-lib/goquery瀹夎goquery搴擄細$goget-ugithub.com/PuerkitoBio/goquery璁╂垜浠啓涓€涓猤rab鐧惧害鐑悳灏忕▼搴忥細packagemainimport("fmt""log""net/http""github.com/PuerkitoBio/goquery")funcBaiduHotSearch(){res,err:=http.Get("http://www.baidu.com")iferr!=nil{log.Fatal(err)}deferres.Body.Close()ifres.StatusCode!=200{log.Fatalf("鐘舵€佺爜閿欒:%d%s",res.StatusCode,res.Status)}doc,err:=goquery.NewDocumentFromReader(res.Body)濡傛灉閿欒!=nil{log.Fatal(err)}doc.Find(".s-hotsearch-content.hotsearch-item").Each(func(iint,s*goquery.Selection){content:=s.Find(".title-content-title").Text()fmt.Printf("%d:%s\n",i,content)})}funcmain(){BaiduHotSearch()}杩愯锛?gorunmain.go0锛氱唺瀛╁瓙缁欏濡堣浆浜?涓囧厓锛岃禋浜嗗嚑鍗佷釜2锛氱瀛﹀鍙戠幇蟺鏄熺悆3锛氬尰淇濆眬鍥炲簲鍒板叏棰濇姤閿€鏂板啝鐤嫍鍖讳繚4锛氶樋濉炴嫓鐤嗙浜屽ぇ鍩庡競琚偖杞?锛?5骞村悗鏉庡畤鏄ュ拰鍛ㄧ瑪鐣呭悓妗嗙幇韬玤oquery鐨勪娇鐢ㄥ緢绠€鍗曪細鍒涘缓涓€涓猧o.Reader锛屾暟鎹簮鍙互澶氱澶氭牱锛屽彲浠ヤ粠鏂囦欢涓鍙栵紝鍙互鏄篃鍙互閫氳繃HTTP璇锋眰鑾峰彇宸叉湁鐨勫瓧绗︿覆锛涜皟鐢∟ewDocumentFromReader浼犲叆涓婇潰鐨刬o.Reader鏋勯€犱竴涓?goquery.Document瀵硅薄锛涢偅涔堝氨鍙互璋冪敤Document鐩稿叧鐨勬柟娉曟潵鏌ヨ鎴戜滑鎰熷叴瓒g殑鍐呭銆傚湪鑾峰彇鎴戜滑鎰熷叴瓒g殑鍐呭涔嬪墠锛屾垜浠繀椤荤煡閬撳畠浠湪HTML鏂囨。涓殑浠€涔堜綅缃€備妇涓婇潰鐨勪緥瀛愶紝鎴戝鐧惧害鐨勭儹鎼滄寰堟劅鍏磋叮銆傞鍏堟墦寮€鐧惧害锛氱劧鍚庯紝鎵撳紑娴忚鍣ㄧ殑寮€鍙戣€呭伐鍏枫€傛垜鐢ㄧ殑鏄疌hrome娴忚鍣紝鎸塅12锛氬湪鏂囨。涓壘鍒颁綘瑕佽幏鍙栫殑鍐呭鎵€鍦ㄧ殑浣嶇疆銆傚鏋滈渶瑕侊紝鍙互浣跨敤寮€鍙戣€呭伐鍏峰乏涓婅鐨勫畾浣嶆寜閽繘琛屽畾浣嶏紝鐐瑰嚮璇ユ寜閽紝鐒跺悗鐐瑰嚮鎴戜滑瑕佸畾浣嶇殑鍐呭锛屽畠浼氳嚜鍔ㄥ畾浣嶅埌鐩稿簲鐨刪tml婧愮爜浣嶇疆銆傚緢鏂逛究锛佺劧鍚庤皟鐢ㄧ浉鍏崇殑鏌ユ壘鏂规硶锛屼紶鍏SS閫夋嫨鍣ㄣ€傞€夋嫨鍣ㄥ彲浠ユ湁澶氱褰㈠紡銆備笂闈紝鎴戜娇鐢?s-hotsearch-content.hotsearch-item鏉ュ畾浣嶇儹闂ㄥ垪琛ㄤ腑鐨勬瘡涓」鐩€傝繖閲岀殑璇硶涓巎Query鐨勭浉鍚屻€?s-hotsearch-content.hotsearch-item琛ㄧず鏌ユ壘class=s-hotsearch-content鐨勮妭鐐逛笅鎵€鏈塩lass=hotsearch-item鐨勮妭鐐广€傚綋鐒惰繖閲屾垜浠娇鐢?s-hotsearch-contentli涔熸槸涓€鏍风殑銆傜湅浣犲枩娆粈涔堛€侳ind鏂规硶杩斿洖涓€涓?goquery.Selection瀵硅薄銆傛帴涓嬫潵锛屾垜浠娇鐢⊿election.Each閬嶅巻姣忎竴涓儹鐐瑰垪琛ㄦ潯鐩紝杈撳嚭鐑偣鍒楄〃鐨勫唴瀹癸紝鍗砪lass=title-content-title鐨剆pan鍏冪礌鐨勫唴瀹广€傚熀鏈蹇礸oquery鏆撮湶浜嗕袱涓粨鏋凞ocument鍜孲election锛屼互鍙婁竴涓帴鍙atcher銆備笌jQuery涓嶅悓锛宯et/html鍖呰В鏋怘TML骞惰繑鍥炶妭鐐硅€屼笉鏄畬鏁寸殑DOM鏍戙€傛墍浠Query涓殑鍚勭鐘舵€佹搷浣滃嚱鏁板湪goquery涓兘娌℃湁鐩稿簲鐨勬柟娉曪紝姣斿height/css/detach绛夈€傚彟澶栵紝jQuery杩樻湁涓€涓樉鐫€鐨勭壒鐐癸紝灏辨槸鏍规嵁浼犲叆鍙傛暟鐨勪釜鏁板拰绫诲瀷瀹炵幇涓嶅悓鐨勫姛鑳姐€俫oquery浣跨敤闈欐€佺被鍨嬬紪璇戣瑷€Go銆傚鏋滀篃閲囩敤杩欑鏂瑰紡锛岄潤鎬佽瑷€鐨勪紭鍔垮氨鍙戞尌涓嶅嚭鏉ヤ簡銆備负姝わ紝goquery瀵规柟娉曞懡鍚嶅仛浜嗕竴浜涚害瀹氾細jQuery涓彲浠ヤ笉甯﹀弬鏁拌皟鐢ㄧ殑鍑芥暟鍦╣oquery涓悓鍚嶏紝姣斿Prev()銆傛帴鍙楀瓧绗︿覆閫夋嫨鍣ㄥ弬鏁扮殑鐗堟湰鍦╣oquery涓鍛藉悕涓篨xxFiltered()锛屼緥濡侾revFiltered()锛涘彧鎺ュ彈涓€涓弬鏁扮殑jQuery鍑芥暟鍦╣oquery涓叿鏈夌浉鍚岀殑鍚嶇О锛屼緥濡侷s();jQuery鎺ュ彈涓€涓猨Query瀵硅薄浣滀负鍙傛暟鐨勫嚱鏁帮紝鍦╣oquery涓悕涓篨xxSelection()锛屾帴鍙椾竴涓?Selection绫诲瀷鐨勫弬鏁帮紝姣斿FilterSelection()锛沯Query涓帴鍙椾竴涓狣OM鍏冪礌浣滀负鍙傛暟鐨勫嚱鏁帮紝鍦╣oquery涓悕涓篨xxNodes()锛屾帴鍙椾竴涓?html.Node绫诲瀷鐨勫彉闀垮弬鏁帮紝濡侳ilterNodes()锛沯Query涓帴鍙椾竴涓嚱鏁颁綔涓哄弬鏁扮殑鍑芥暟鍦╣oquery涓懡鍚嶄负XxxFunction()锛屾帴鍙椾竴涓嚱鏁颁綔涓哄弬鏁帮紝濡侳ilterFunction()锛沢oquery涓彲浠ョ敤閫夋嫨鍣ㄨ皟鐢ㄧ殑鍑芥暟鏈変竴涓増鏈紝鎺ュ彈涓€涓狹atcher绫诲瀷鍙傛暟锛屽懡鍚嶄负XxxMatcher()锛屽IsMatcher()銆備簡瑙Query鐨勭闉嬶紝鐔熸倝浠ヤ笂绾﹀畾鍚庯紝浣跨敤goquery鍩烘湰娌℃湁闂銆傜紪鐮佺敱浜巒et/html闇€瑕乁TF-8缂栫爜锛屾墍浠oquery涔熸槸濡傛銆傛垜浠渶瑕佺‘淇濅紶閫掔粰goquery鐨凥TML婧愬瓧绗︿覆鏄疷TF-8缂栫爜鐨勩€傜幇鍦ㄥ緢灏戞湁闈濽TF-8缂栫爜鐨勭綉椤点€傛棭鏈熷浗鍐呭緢澶氱綉绔欎娇鐢ㄧ殑鏄疓B2312鎴栬€匞BK缂栫爜銆傚鏋滈亣鍒伴潪UTF-8缂栫爜鐨勭綉椤垫€庝箞鍔烇紵鎮ㄥ彲浠ヤ娇鐢╥conv-go灏嗗瓧绗︿覆鐨勭紪鐮佽浆鎹负UTF-8銆傚湪鐭ヤ箮鐨勮繖涓洖绛攈ttps://www.zhihu.com/question/20091439锛屾壘鍒颁竴涓?000骞寸殑鏂版氮缃戦〉锛?2灏忔椂缃戠粶鐢熷瓨娴嬭瘯锛屼娇鐢℅B2312缂栫爜锛氭姄鍙栬繖涓鍗曞惂銆傞鍏堝畨瑁卛conv-go锛?goget-ugithub.com/djimenez/iconv-goencoding:packagemainimport("fmt""log""net/http""github.com/PuerkitoBio/goquery""github.com/djimenez/iconv-go")funcSinaNewSurvival(){res,err:=http.Get("http://news.sina.com.cn/society/netsurvival")iferr!=nil{log.Fatal(err)}deferres.Body.Close()ifres.StatusCode!=200{log.Fatalf("鐘舵€佺爜閿欒:%d%s",res.StatusCode,res.Status)}utf8Body,err:=iconv.NewReader(res.Body,"gb2312","utf-8")iferr!=nil{log.Fatal(err)}doc,err:=goquery.NewDocumentFromReader(utf8Body)iferr!=nil{log.Fatal(err))}doc.Find(".title14li").Each(func(iint,s*goquery.Selection){content:=s.Find("a").Text()time:=s.Find("font").Text()fmt.Printf("%d:%s%s\n",i,content,time)})}funcmain(){SinaNewSurvival()}鍩烘湰缁撴瀯鍚岀涓€涓緥濡傦紝娉ㄦ剰浣跨敤iconv:utf8Body,err:=iconv.NewReader(res.Body,"gb2312","utf-8")濡傛灉涓嶅仛杩欏眰con鐗堟湰锛屾渶缁堣緭鍑轰細鍑虹幇涔辩爜銆俽un:$gorunmain.go0:51宀佹瘝瀛愬浜诧紝鎶ュ垔缃戠粶澶ф樉韬墜锛堥檮鍥撅級(2000/08/0716:39)1锛氳鑰呭弽鏄犵湅涓嶆噦缃戠粶鏂拌瘝(2000/05/1703:15)2锛氬箍涓滃崡娴蜂俊鎭綉缁滄暀鑲茶繘璇惧爞(2000/05/0716:48)3锛氱粡娴庤瘎璁猴細涓浗浜掕仈缃戠儹璇ュ噳浜?2000/04)/2407:36)4锛氶殣褰⑩€滅綉缁滆瀵熲€濆湪琛屽姩(2000/04/2218:31)5锛氭櫤鎱хぞ鍖哄垵鍏疯妯★紝鍖椾含浜掕仈缃戝競鍦哄紑甯?2000/04/2117:10)6锛氭埓灏斿湪娓呭崕璋堜簰鑱旂綉鏃朵唬鍜屾柊娴綉(2000/04/0512:42)7锛氳憲鍚嶇數瑙嗚妭鐩富鎸佷汉鏉ㄦ緶绉瀬娑夎冻浜掕仈缃戣涓?2000/03/2411:55)8:鏉窞寤虹珛姘戞剰璋冩煡缃?2000/03/2317:10)9:銆婃眰鏄€嬫潅蹇楁帹鍑虹綉缁滅増(2000/03/2307:20)10:鍥介槻浜ら€氱綉缁滄ā鍨嬫瀯寤烘垚鍔?2000/03/2305:47)11锛氳瘎璁猴細缃戜笂浜ゆ槗锛屾垜浠€涔堟椂鍊欏彲浠ヨ鎴戠埍浣?1999/09/1919:56)12锛氫粖澶╃綉缁滅敓瀛樻祴璇曠殑鑾疯儨鑰呰閫夊嚭(1999/09/1616:25)濡傛灉鎴戜滑鎯充负浜嗘洿閫氱敤骞朵笖涓嶆兂鍦ㄤ唬鐮佷腑纭紪鐮佺綉椤典唬鐮侊紝鎴戜滑鍙互浣跨敤x/text/encoding鍜寈/net/html/charset杩欎袱涓寘鏉ョ寽娴嬬綉椤电殑缂栫爜銆傚畨瑁厁/text鍖咃紝鐢变簬x/net/html/charset鏄痻/net/html鐨勫瓙鍖咃紝鏃犻渶鍐嶆瀹夎锛?goget-ugolang.org/x/text宸ュ叿鍑芥暟锛歠uncdetectContentCharset(bodyio.Reader)string{r:=bufio.NewReader(body)濡傛灉鏁版嵁锛宔rr:=r.Peek(1024);err==nil{if_,name,_:=charset.DetermineEncoding(data,"");len(name)!=0{returnname}}return"utf-8"}funcDecodeHTMLBody(bodyio.Reader,charsetstring)(io.Reader,error){濡傛灉瀛楃闆?=""{charset=detectContentCharset(body)}e,err:=htmlindex.Get(charset)iferr!=nil{returnnil,err}ifname,_:=htmlindex.Name(e);鍚嶇О!="utf-8"{涓讳綋=e.NewDecoder().Reader(body)}returnbody,nil}charset.DetermineEncoding浼氭牴鎹瓾TML椤甸潰涓殑鍏冧俊鎭寽娴嬬綉椤电殑缂栫爜銆傛帴涓嬫潵锛屽皢浣跨敤iconv-go鐨勯偅琛屼唬鐮佹敼鎴愬涓嬶細utf8Body,err:=DecodeHTMLBody(res.Body,"")鎬荤粨goquery鍔熻兘寮哄ぇ锛屼娇鐢ㄦ柟渚匡紝鏄埇铏簱colly鐨勫熀鐭炽€傚畠鍙互鐢ㄦ潵鍋氫竴浜涚畝鍗曠殑鎶撳彇鍜孒TML澶勭悊銆傚洜涓哄お搴曞眰锛屾帹鑽愪娇鐢╟olly鏉ョ埇鍙栧ぇ閲忓鏉傜殑缃戦〉銆傚鏋滀綘鍙戠幇浜嗗ソ鐜╁張濂界敤鐨凣o璇█搴擄紝娆㈣繋鍦℅oDailyLibraryGitHub涓婃彁浜ssue馃槃鍙傝€僩oqueryGitHub锛歨ttps://github.com/PuerkitoBio/goqueryGoDailyLibraryGitHub锛歨ttps:///github.com/darjun/go-daily-lib鎴戠殑鍗氬锛歨ttps://darjun.github.io娆㈣繋鍏虫敞鎴戠殑寰俊鍏紬鍙枫€怗oUpUp銆戯紝涓€璧峰涔狅紝涓€璧疯繘姝