PNG  IHDRQgAMA a cHRMz&u0`:pQ<bKGDgmIDATxwUﹻ& ^CX(J I@ "% (** BX +*i"]j(IH{~R)[~>h{}gy)I$Ij .I$I$ʊy@}x.: $I$Ii}VZPC)I$IF ^0ʐJ$I$Q^}{"r=OzI$gRZeC.IOvH eKX $IMpxsk.쒷/&r[޳<v| .I~)@$updYRa$I |M.e JaֶpSYR6j>h%IRز if&uJ)M$I vLi=H;7UJ,],X$I1AҒJ$ XY XzI@GNҥRT)E@;]K*Mw;#5_wOn~\ DC&$(A5 RRFkvIR}l!RytRl;~^ǷJj اy뷦BZJr&ӥ8Pjw~vnv X^(I;4R=P[3]J,]ȏ~:3?[ a&e)`e*P[4]T=Cq6R[ ~ޤrXR Հg(t_HZ-Hg M$ãmL5R uk*`%C-E6/%[t X.{8P9Z.vkXŐKjgKZHg(aK9ڦmKjѺm_ \#$5,)-  61eJ,5m| r'= &ڡd%-]J on Xm|{ RҞe $eڧY XYrԮ-a7RK6h>n$5AVڴi*ֆK)mѦtmr1p| q:흺,)Oi*ֺK)ܬ֦K-5r3>0ԔHjJئEZj,%re~/z%jVMڸmrt)3]J,T K֦OvԒgii*bKiNO~%PW0=dii2tJ9Jݕ{7"I P9JKTbu,%r"6RKU}Ij2HKZXJ,妝 XYrP ެ24c%i^IK|.H,%rb:XRl1X4Pe/`x&P8Pj28Mzsx2r\zRPz4J}yP[g=L) .Q[6RjWgp FIH*-`IMRaK9TXcq*I y[jE>cw%gLRԕiFCj-ďa`#e~I j,%r,)?[gp FI˨mnWX#>mʔ XA DZf9,nKҲzIZXJ,L#kiPz4JZF,I,`61%2s $,VOϚ2/UFJfy7K> X+6 STXIeJILzMfKm LRaK9%|4p9LwJI!`NsiazĔ)%- XMq>pk$-$Q2x#N ؎-QR}ᶦHZډ)J,l#i@yn3LN`;nڔ XuX5pF)m|^0(>BHF9(cզEerJI rg7 4I@z0\JIi䵙RR0s;$s6eJ,`n 䂦0a)S)A 1eJ,堌#635RIgpNHuTH_SԕqVe ` &S)>p;S$魁eKIuX`I4춒o}`m$1":PI<[v9^\pTJjriRŭ P{#{R2,`)e-`mgj~1ϣLKam7&U\j/3mJ,`F;M'䱀 .KR#)yhTq;pcK9(q!w?uRR,n.yw*UXj#\]ɱ(qv2=RqfB#iJmmL<]Y͙#$5 uTU7ӦXR+q,`I}qL'`6Kͷ6r,]0S$- [RKR3oiRE|nӦXR.(i:LDLTJjY%o:)6rxzҒqTJjh㞦I.$YR.ʼnGZ\ֿf:%55 I˼!6dKxm4E"mG_ s? .e*?LRfK9%q#uh$)i3ULRfK9yxm܌bj84$i1U^@Wbm4uJ,ҪA>_Ij?1v32[gLRD96oTaR׿N7%L2 NT,`)7&ƝL*꽙yp_$M2#AS,`)7$rkTA29_Iye"|/0t)$n XT2`YJ;6Jx".e<`$) PI$5V4]29SRI>~=@j]lp2`K9Jaai^" Ԋ29ORI%:XV5]JmN9]H;1UC39NI%Xe78t)a;Oi Ҙ>Xt"~G>_mn:%|~ޅ_+]$o)@ǀ{hgN;IK6G&rp)T2i୦KJuv*T=TOSV>(~D>dm,I*Ɛ:R#ۙNI%D>G.n$o;+#RR!.eU˽TRI28t)1LWϚ>IJa3oFbu&:tJ*(F7y0ZR ^p'Ii L24x| XRI%ۄ>S1]Jy[zL$adB7.eh4%%누>WETf+3IR:I3Xה)3אOۦSRO'ٺ)S}"qOr[B7ϙ.edG)^ETR"RtRݜh0}LFVӦDB^k_JDj\=LS(Iv─aTeZ%eUAM-0;~˃@i|l @S4y72>sX-vA}ϛBI!ݎߨWl*)3{'Y|iSlEڻ(5KtSI$Uv02,~ԩ~x;P4ցCrO%tyn425:KMlD ^4JRxSهF_}شJTS6uj+ﷸk$eZO%G*^V2u3EMj3k%)okI]dT)URKDS 7~m@TJR~荪fT"֛L \sM -0T KfJz+nإKr L&j()[E&I ߴ>e FW_kJR|!O:5/2跌3T-'|zX ryp0JS ~^F>-2< `*%ZFP)bSn"L :)+pʷf(pO3TMW$~>@~ū:TAIsV1}S2<%ޟM?@iT ,Eūoz%i~g|`wS(]oȤ8)$ ntu`өe`6yPl IzMI{ʣzʨ )IZ2= ld:5+請M$-ї;U>_gsY$ÁN5WzWfIZ)-yuXIfp~S*IZdt;t>KūKR|$#LcԀ+2\;kJ`]YǔM1B)UbG"IRߊ<xܾӔJ0Z='Y嵤 Leveg)$znV-º^3Ւof#0Tfk^Zs[*I꯳3{)ˬW4Ւ4 OdpbZRS|*I 55#"&-IvT&/윚Ye:i$ 9{LkuRe[I~_\ؠ%>GL$iY8 9ܕ"S`kS.IlC;Ҏ4x&>u_0JLr<J2(^$5L s=MgV ~,Iju> 7r2)^=G$1:3G< `J3~&IR% 6Tx/rIj3O< ʔ&#f_yXJiގNSz; Tx(i8%#4 ~AS+IjerIUrIj362v885+IjAhK__5X%nV%Iͳ-y|7XV2v4fzo_68"S/I-qbf; LkF)KSM$ Ms>K WNV}^`-큧32ŒVؙGdu,^^m%6~Nn&͓3ŒVZMsRpfEW%IwdǀLm[7W&bIRL@Q|)* i ImsIMmKmyV`i$G+R 0tV'!V)֏28vU7͒vHꦼtxꗞT ;S}7Mf+fIRHNZUkUx5SAJㄌ9MqμAIRi|j5)o*^'<$TwI1hEU^c_j?Е$%d`z cyf,XO IJnTgA UXRD }{H}^S,P5V2\Xx`pZ|Yk:$e ~ @nWL.j+ϝYb퇪bZ BVu)u/IJ_ 1[p.p60bC >|X91P:N\!5qUB}5a5ja `ubcVxYt1N0Zzl4]7­gKj]?4ϻ *[bg$)+À*x쳀ogO$~,5 زUS9 lq3+5mgw@np1sso Ӻ=|N6 /g(Wv7U;zωM=wk,0uTg_`_P`uz?2yI!b`kĸSo+Qx%!\οe|އԁKS-s6pu_(ֿ$i++T8=eY; צP+phxWQv*|p1. ά. XRkIQYP,drZ | B%wP|S5`~́@i޾ E;Չaw{o'Q?%iL{u D?N1BD!owPHReFZ* k_-~{E9b-~P`fE{AܶBJAFO wx6Rox5 K5=WwehS8 (JClJ~ p+Fi;ŗo+:bD#g(C"wA^ r.F8L;dzdIHUX݆ϞXg )IFqem%I4dj&ppT{'{HOx( Rk6^C٫O.)3:s(۳(Z?~ٻ89zmT"PLtw䥈5&b<8GZ-Y&K?e8,`I6e(֍xb83 `rzXj)F=l($Ij 2*(F?h(/9ik:I`m#p3MgLaKjc/U#n5S# m(^)=y=đx8ŬI[U]~SцA4p$-F i(R,7Cx;X=cI>{Km\ o(Tv2vx2qiiDJN,Ҏ!1f 5quBj1!8 rDFd(!WQl,gSkL1Bxg''՞^ǘ;pQ P(c_ IRujg(Wz bs#P­rz> k c&nB=q+ؔXn#r5)co*Ũ+G?7< |PQӣ'G`uOd>%Mctz# Ԫڞ&7CaQ~N'-P.W`Oedp03C!IZcIAMPUۀ5J<\u~+{9(FbbyAeBhOSܳ1 bÈT#ŠyDžs,`5}DC-`̞%r&ڙa87QWWp6e7 Rϫ/oY ꇅ Nܶըtc!LA T7V4Jsū I-0Pxz7QNF_iZgúWkG83 0eWr9 X]㾮݁#Jˢ C}0=3ݱtBi]_ &{{[/o[~ \q鯜00٩|cD3=4B_b RYb$óBRsf&lLX#M*C_L܄:gx)WΘsGSbuL rF$9';\4Ɍq'n[%p.Q`u hNb`eCQyQ|l_C>Lb꟟3hSb #xNxSs^ 88|Mz)}:](vbۢamŖ࿥ 0)Q7@0=?^k(*J}3ibkFn HjB׻NO z x}7p 0tfDX.lwgȔhԾŲ }6g E |LkLZteu+=q\Iv0쮑)QٵpH8/2?Σo>Jvppho~f>%bMM}\//":PTc(v9v!gոQ )UfVG+! 35{=x\2+ki,y$~A1iC6#)vC5^>+gǵ@1Hy٪7u;p psϰu/S <aʸGu'tD1ԝI<pg|6j'p:tպhX{o(7v],*}6a_ wXRk,O]Lܳ~Vo45rp"N5k;m{rZbΦ${#)`(Ŵg,;j%6j.pyYT?}-kBDc3qA`NWQū20/^AZW%NQ MI.X#P#,^Ebc&?XR tAV|Y.1!؅⨉ccww>ivl(JT~ u`ٵDm q)+Ri x/x8cyFO!/*!/&,7<.N,YDŽ&ܑQF1Bz)FPʛ?5d 6`kQձ λc؎%582Y&nD_$Je4>a?! ͨ|ȎWZSsv8 j(I&yj Jb5m?HWp=g}G3#|I,5v珿] H~R3@B[☉9Ox~oMy=J;xUVoj bUsl_35t-(ՃɼRB7U!qc+x4H_Qo֮$[GO<4`&č\GOc[.[*Af%mG/ ňM/r W/Nw~B1U3J?P&Y )`ѓZ1p]^l“W#)lWZilUQu`-m|xĐ,_ƪ|9i:_{*(3Gѧ}UoD+>m_?VPۅ15&}2|/pIOʵ> GZ9cmíتmnz)yߐbD >e}:) r|@R5qVSA10C%E_'^8cR7O;6[eKePGϦX7jb}OTGO^jn*媓7nGMC t,k31Rb (vyܴʭ!iTh8~ZYZp(qsRL ?b}cŨʊGO^!rPJO15MJ[c&~Z`"ѓޔH1C&^|Ш|rʼ,AwĴ?b5)tLU)F| &g٣O]oqSUjy(x<Ϳ3 .FSkoYg2 \_#wj{u'rQ>o;%n|F*O_L"e9umDds?.fuuQbIWz |4\0 sb;OvxOSs; G%T4gFRurj(֍ڑb uԖKDu1MK{1^ q; C=6\8FR艇!%\YÔU| 88m)֓NcLve C6z;o&X x59:q61Z(T7>C?gcļxѐ Z oo-08jہ x,`' ҔOcRlf~`jj".Nv+sM_]Zk g( UOPyεx%pUh2(@il0ݽQXxppx-NS( WO+轾 nFߢ3M<;z)FBZjciu/QoF 7R¥ ZFLF~#ȣߨ^<쩡ݛкvџ))ME>ώx4m#!-m!L;vv#~Y[đKmx9.[,UFS CVkZ +ߟrY٧IZd/ioi$%͝ب_ֶX3ܫhNU ZZgk=]=bbJS[wjU()*I =ώ:}-蹞lUj:1}MWm=̛ _ ¾,8{__m{_PVK^n3esw5ӫh#$-q=A̟> ,^I}P^J$qY~Q[ Xq9{#&T.^GVj__RKpn,b=`żY@^՝;z{paVKkQXj/)y TIc&F;FBG7wg ZZDG!x r_tƢ!}i/V=M/#nB8 XxЫ ^@CR<{䤭YCN)eKOSƟa $&g[i3.C6xrOc8TI;o hH6P&L{@q6[ Gzp^71j(l`J}]e6X☉#͕ ׈$AB1Vjh㭦IRsqFBjwQ_7Xk>y"N=MB0 ,C #o6MRc0|$)ف"1!ixY<B9mx `,tA>)5ػQ?jQ?cn>YZe Tisvh# GMމȇp:ԴVuږ8ɼH]C.5C!UV;F`mbBk LTMvPʍϤj?ԯ/Qr1NB`9s"s TYsz &9S%U԰> {<ؿSMxB|H\3@!U| k']$U+> |HHMLޢ?V9iD!-@x TIî%6Z*9X@HMW#?nN ,oe6?tQwڱ.]-y':mW0#!J82qFjH -`ѓ&M0u Uγmxϵ^-_\])@0Rt.8/?ٰCY]x}=sD3ojަЫNuS%U}ԤwHH>ڗjܷ_3gN q7[q2la*ArǓԖ+p8/RGM ]jacd(JhWko6ڎbj]i5Bj3+3!\j1UZLsLTv8HHmup<>gKMJj0@H%,W΃7R) ">c, xixј^ aܖ>H[i.UIHc U1=yW\=S*GR~)AF=`&2h`DzT󑓶J+?W+}C%P:|0H܆}-<;OC[~o.$~i}~HQ TvXΈr=b}$vizL4:ȰT|4~*!oXQR6Lk+#t/g lԁߖ[Jڶ_N$k*". xsxX7jRVbAAʯKҎU3)zSNN _'s?f)6X!%ssAkʱ>qƷb hg %n ~p1REGMHH=BJiy[<5 ǁJҖgKR*倳e~HUy)Ag,K)`Vw6bRR:qL#\rclK/$sh*$ 6덤 KԖc 3Z9=Ɣ=o>X Ώ"1 )a`SJJ6k(<c e{%kϊP+SL'TcMJWRm ŏ"w)qc ef꒵i?b7b('"2r%~HUS1\<(`1Wx9=8HY9m:X18bgD1u ~|H;K-Uep,, C1 RV.MR5άh,tWO8WC$ XRVsQS]3GJ|12 [vM :k#~tH30Rf-HYݺ-`I9%lIDTm\ S{]9gOڒMNCV\G*2JRŨ;Rҏ^ڽ̱mq1Eu?To3I)y^#jJw^Ńj^vvlB_⋌P4x>0$c>K†Aļ9s_VjTt0l#m>E-,,x,-W)سo&96RE XR.6bXw+)GAEvL)͞K4$p=Ũi_ѱOjb HY/+@θH9޼]Nԥ%n{ &zjT? Ty) s^ULlb,PiTf^<À] 62R^V7)S!nllS6~͝V}-=%* ʻ>G DnK<y&>LPy7'r=Hj 9V`[c"*^8HpcO8bnU`4JȪAƋ#1_\ XϘHPRgik(~G~0DAA_2p|J묭a2\NCr]M_0 ^T%e#vD^%xy-n}-E\3aS%yN!r_{ )sAw ڼp1pEAk~v<:`'ӭ^5 ArXOI驻T (dk)_\ PuA*BY]yB"l\ey hH*tbK)3 IKZ򹞋XjN n *n>k]X_d!ryBH ]*R 0(#'7 %es9??ښFC,ՁQPjARJ\Ρw K#jahgw;2$l*) %Xq5!U᢯6Re] |0[__64ch&_}iL8KEgҎ7 M/\`|.p,~`a=BR?xܐrQ8K XR2M8f ?`sgWS%" Ԉ 7R%$ N}?QL1|-эټwIZ%pvL3Hk>,ImgW7{E xPHx73RA @RS CC !\ȟ5IXR^ZxHл$Q[ŝ40 (>+ _C >BRt<,TrT {O/H+˟Pl6 I B)/VC<6a2~(XwV4gnXR ϱ5ǀHٻ?tw똤Eyxp{#WK qG%5],(0ӈH HZ])ג=K1j&G(FbM@)%I` XRg ʔ KZG(vP,<`[ Kn^ SJRsAʠ5xՅF`0&RbV tx:EaUE/{fi2;.IAwW8/tTxAGOoN?G}l L(n`Zv?pB8K_gI+ܗ #i?ޙ.) p$utc ~DžfՈEo3l/)I-U?aԅ^jxArA ΧX}DmZ@QLےbTXGd.^|xKHR{|ΕW_h] IJ`[G9{).y) 0X YA1]qp?p_k+J*Y@HI>^?gt.06Rn ,` ?);p pSF9ZXLBJPWjgQ|&)7! HjQt<| ؅W5 x W HIzYoVMGP Hjn`+\(dNW)F+IrS[|/a`K|ͻ0Hj{R,Q=\ (F}\WR)AgSG`IsnAR=|8$}G(vC$)s FBJ?]_u XRvύ6z ŨG[36-T9HzpW̞ú Xg큽=7CufzI$)ki^qk-) 0H*N` QZkk]/tnnsI^Gu't=7$ Z;{8^jB% IItRQS7[ϭ3 $_OQJ`7!]W"W,)Iy W AJA;KWG`IY{8k$I$^%9.^(`N|LJ%@$I}ֽp=FB*xN=gI?Q{٥4B)mw $Igc~dZ@G9K X?7)aK%݅K$IZ-`IpC U6$I\0>!9k} Xa IIS0H$I H ?1R.Чj:4~Rw@p$IrA*u}WjWFPJ$I➓/6#! LӾ+ X36x8J |+L;v$Io4301R20M I$-E}@,pS^ޟR[/s¹'0H$IKyfŸfVOπFT*a$I>He~VY/3R/)>d$I>28`Cjw,n@FU*9ttf$I~<;=/4RD~@ X-ѕzἱI$: ԍR a@b X{+Qxuq$IЛzo /~3\8ڒ4BN7$IҀj V]n18H$IYFBj3̵̚ja pp $Is/3R Ӻ-Yj+L;.0ŔI$Av? #!5"aʄj}UKmɽH$IjCYs?h$IDl843.v}m7UiI=&=0Lg0$I4: embe` eQbm0u? $IT!Sƍ'-sv)s#C0:XB2a w I$zbww{."pPzO =Ɔ\[ o($Iaw]`E).Kvi:L*#gР7[$IyGPI=@R 4yR~̮´cg I$I/<tPͽ hDgo 94Z^k盇΄8I56^W$I^0̜N?4*H`237}g+hxoq)SJ@p|` $I%>-hO0eO>\ԣNߌZD6R=K ~n($I$y3D>o4b#px2$yڪtzW~a $I~?x'BwwpH$IZݑnC㧄Pc_9sO gwJ=l1:mKB>Ab<4Lp$Ib o1ZQ@85b̍ S'F,Fe,^I$IjEdù{l4 8Ys_s Z8.x m"+{~?q,Z D!I$ϻ'|XhB)=…']M>5 rgotԎ 獽PH$IjIPhh)n#cÔqA'ug5qwU&rF|1E%I$%]!'3AFD/;Ck_`9 v!ٴtPV;x`'*bQa w I$Ix5 FC3D_~A_#O݆DvV?<qw+I$I{=Z8".#RIYyjǪ=fDl9%M,a8$I$Ywi[7ݍFe$s1ՋBVA?`]#!oz4zjLJo8$I$%@3jAa4(o ;p,,dya=F9ً[LSPH$IJYЉ+3> 5"39aZ<ñh!{TpBGkj}Sp $IlvF.F$I z< '\K*qq.f<2Y!S"-\I$IYwčjF$ w9 \ߪB.1v!Ʊ?+r:^!I$BϹB H"B;L'G[ 4U#5>੐)|#o0aڱ$I>}k&1`U#V?YsV x>{t1[I~D&(I$I/{H0fw"q"y%4 IXyE~M3 8XψL}qE$I[> nD?~sf ]o΁ cT6"?'_Ἣ $I>~.f|'!N?⟩0G KkXZE]ޡ;/&?k OۘH$IRۀwXӨ<7@PnS04aӶp.:@\IWQJ6sS%I$e5ڑv`3:x';wq_vpgHyXZ 3gЂ7{{EuԹn±}$I$8t;b|591nءQ"P6O5i }iR̈́%Q̄p!I䮢]O{H$IRϻ9s֧ a=`- aB\X0"+5"C1Hb?߮3x3&gşggl_hZ^,`5?ߎvĸ%̀M!OZC2#0x LJ0 Gw$I$I}<{Eb+y;iI,`ܚF:5ܛA8-O-|8K7s|#Z8a&><a&/VtbtLʌI$I$I$I$I$I$IRjDD%tEXtdate:create2022-05-31T04:40:26+00:00!Î%tEXtdate:modify2022-05-31T04:40:26+00:00|{2IENDB` sh-3ll

HOME


sh-3ll 1.0
DIR:/home/korminas/www/roundcube/program/lib/Roundcube/
Upload File :
Current File : /home/korminas/www/roundcube/program/lib/Roundcube/rcube_html2text.php
<?php

/**
 +-----------------------------------------------------------------------+
 | This file is part of the Roundcube Webmail client                     |
 |                                                                       |
 | Copyright (C) The Roundcube Dev Team                                  |
 | Copyright (c) 2005-2007, Jon Abernathy <jon@chuggnutt.com>            |
 |                                                                       |
 | Licensed under the GNU General Public License version 3 or            |
 | any later version with exceptions for skins & plugins.                |
 | See the README file for a full license statement.                     |
 |                                                                       |
 | PURPOSE:                                                              |
 |   Converts HTML to formatted plain text (based on html2text class)    |
 +-----------------------------------------------------------------------+
 | Author: Thomas Bruederli <roundcube@gmail.com>                        |
 | Author: Aleksander Machniak <alec@alec.pl>                            |
 | Author: Jon Abernathy <jon@chuggnutt.com>                             |
 +-----------------------------------------------------------------------+
 */

/**
 *  Takes HTML and converts it to formatted, plain text.
 *
 *  Thanks to Alexander Krug (http://www.krugar.de/) to pointing out and
 *  correcting an error in the regexp search array. Fixed 7/30/03.
 *
 *  Updated set_html() function's file reading mechanism, 9/25/03.
 *
 *  Thanks to Joss Sanglier (http://www.dancingbear.co.uk/) for adding
 *  several more HTML entity codes to the $search and $replace arrays.
 *  Updated 11/7/03.
 *
 *  Thanks to Darius Kasperavicius (http://www.dar.dar.lt/) for
 *  suggesting the addition of $allowed_tags and its supporting function
 *  (which I slightly modified). Updated 3/12/04.
 *
 *  Thanks to Justin Dearing for pointing out that a replacement for the
 *  <TH> tag was missing, and suggesting an appropriate fix.
 *  Updated 8/25/04.
 *
 *  Thanks to Mathieu Collas (http://www.myefarm.com/) for finding a
 *  display/formatting bug in the _build_link_list() function: email
 *  readers would show the left bracket and number ("[1") as part of the
 *  rendered email address.
 *  Updated 12/16/04.
 *
 *  Thanks to Wojciech Bajon (http://histeria.pl/) for submitting code
 *  to handle relative links, which I hadn't considered. I modified his
 *  code a bit to handle normal HTTP links and MAILTO links. Also for
 *  suggesting three additional HTML entity codes to search for.
 *  Updated 03/02/05.
 *
 *  Thanks to Jacob Chandler for pointing out another link condition
 *  for the _build_link_list() function: "https".
 *  Updated 04/06/05.
 *
 *  Thanks to Marc Bertrand (http://www.dresdensky.com/) for
 *  suggesting a revision to the word wrapping functionality; if you
 *  specify a $width of 0 or less, word wrapping will be ignored.
 *  Updated 11/02/06.
 *
 *  *** Big housecleaning updates below:
 *
 *  Thanks to Colin Brown (http://www.sparkdriver.co.uk/) for
 *  suggesting the fix to handle </li> and blank lines (whitespace).
 *  Christian Basedau (http://www.movetheweb.de/) also suggested the
 *  blank lines fix.
 *
 *  Special thanks to Marcus Bointon (http://www.synchromedia.co.uk/),
 *  Christian Basedau, Norbert Laposa (http://ln5.co.uk/),
 *  Bas van de Weijer, and Marijn van Butselaar
 *  for pointing out my glaring error in the <th> handling. Marcus also
 *  supplied a host of fixes.
 *
 *  Thanks to Jeffrey Silverman (http://www.newtnotes.com/) for pointing
 *  out that extra spaces should be compressed--a problem addressed with
 *  Marcus Bointon's fixes but that I had not yet incorporated.
 *
 *  Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
 *  suggesting a valuable fix with <a> tag handling.
 *
 *  Thanks to Wojciech Bajon (again!) for suggesting fixes and additions,
 *  including the <a> tag handling that Daniel Schledermann pointed
 *  out but that I had not yet incorporated. I haven't (yet)
 *  incorporated all of Wojciech's changes, though I may at some
 *  future time.
 *
 *  *** End of the housecleaning updates. Updated 08/08/07.
 */

/**
 * Converts HTML to formatted plain text
 *
 * @package    Framework
 * @subpackage Utils
 */
class rcube_html2text
{
    const LINKS_NONE = 0;
    const LINKS_END = 1;
    const LINKS_INLINE = 2;
    const LINKS_DEFAULT = self::LINKS_END;

    /**
     * Contains the HTML content to convert.
     *
     * @var string $html
     */
    protected $html;

    /**
     * Contains the converted, formatted text.
     *
     * @var string $text
     */
    protected $text;

    /**
     * Maximum width of the formatted text, in columns.
     *
     * Set this value to 0 (or less) to ignore word wrapping
     * and not constrain text to a fixed-width column.
     *
     * @var int $width
     */
    protected $width = 70;

    /**
     * Target character encoding for output text
     *
     * @var string $charset
     */
    protected $charset = 'UTF-8';

    /**
     * List of preg* regular expression patterns to search for,
     * used in conjunction with $replace.
     *
     * @var array $search
     * @see self::$replace
     */
    protected $search = [
        '/\r/',                                  // Non-legal carriage return
        '/\n*<\/?html>\n*/is',                   // <html>
        '/\n*<head[^>]*>.*?<\/head>\n*/is',      // <head>
        '/\n*<script[^>]*>.*?<\/script>\n*/is',  // <script>
        '/\n*<style[^>]*>.*?<\/style>\n*/is',    // <style>
        '/[\n\t]+/',                             // Newlines and tabs
        '/<p[^>]*>/i',                           // <p>
        '/<\/p>[\s\n\t]*<div[^>]*>/i',           // </p> before <div>
        '/<br[^>]*>[\s\n\t]*<div[^>]*>/i',       // <br> before <div>
        '/<br[^>]*>\s*/i',                       // <br>
        '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
        '/<em[^>]*>(.*?)<\/em>/i',               // <em>
        '/(<ul[^>]*>|<\/ul>)/i',                 // <ul> and </ul>
        '/(<ol[^>]*>|<\/ol>)/i',                 // <ol> and </ol>
        '/<li[^>]*>(.*?)<\/li>/i',               // <li> and </li>
        '/<li[^>]*>/i',                          // <li>
        '/<hr[^>]*>/i',                          // <hr>
        '/<div[^>]*>/i',                         // <div>
        '/(<table[^>]*>|<\/table>)/i',           // <table> and </table>
        '/(<tr[^>]*>|<\/tr>)/i',                 // <tr> and </tr>
        '/<td[^>]*>(.*?)<\/td>/i',               // <td> and </td>
    ];

    /**
     * List of pattern replacements corresponding to patterns searched.
     *
     * @var array $replace
     * @see self::$search
     */
    protected $replace = [
        '',                                     // Non-legal carriage return
        '',                                     // <html>|</html>
        '',                                     // <head>
        '',                                     // <script>
        '',                                     // <style>
        ' ',                                    // Newlines and tabs
        "\n\n",                                 // <p>
        "\n<div>",                              // </p> before <div>
        '<div>',                                // <br> before <div>
        "\n",                                   // <br>
        '_\\1_',                                // <i>
        '_\\1_',                                // <em>
        "\n\n",                                 // <ul> and </ul>
        "\n\n",                                 // <ol> and </ol>
        "\t* \\1\n",                            // <li> and </li>
        "\n\t* ",                               // <li>
        "\n-------------------------\n",        // <hr>
        "<div>\n",                              // <div>
        "\n\n",                                 // <table> and </table>
        "\n",                                   // <tr> and </tr>
        "\t\t\\1\n",                            // <td> and </td>
    ];

    /**
     * List of preg* regular expression patterns to search for,
     * used in conjunction with $ent_replace.
     *
     * @var array $ent_search
     * @see self::$ent_replace
     */
    protected $ent_search = [
        '/&(nbsp|#160);/i',                      // Non-breaking space
        '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i', // Double quotes
        '/&(apos|rsquo|lsquo|#8216|#8217);/i',   // Single quotes
        '/&gt;/i',                               // Greater-than
        '/&lt;/i',                               // Less-than
        '/&(copy|#169);/i',                      // Copyright
        '/&(trade|#8482|#153);/i',               // Trademark
        '/&(reg|#174);/i',                       // Registered
        '/&(mdash|#151|#8212);/i',               // mdash
        '/&(ndash|minus|#8211|#8722);/i',        // ndash
        '/&(bull|#149|#8226);/i',                // Bullet
        '/&(pound|#163);/i',                     // Pound sign
        '/&(euro|#8364);/i',                     // Euro sign
        '/&(amp|#38);/i',                        // Ampersand: see _converter()
        '/[ ]{2,}/',                             // Runs of spaces, post-handling
    ];

    /**
     * List of pattern replacements corresponding to patterns searched.
     *
     * @var array $ent_replace
     * @see self::$ent_search
     */
    protected $ent_replace = [
        "\xC2\xA0",                             // Non-breaking space
        '"',                                    // Double quotes
        "'",                                    // Single quotes
        '>',
        '<',
        '(c)',
        '(tm)',
        '(R)',
        '--',
        '-',
        '*',
        '£',
        'EUR',                                  // Euro sign. €
        '|+|amp|+|',                            // Ampersand: see _converter()
        ' ',                                    // Runs of spaces, post-handling
    ];

    /**
     * List of preg* regular expression patterns to search for
     * and replace using callback function.
     *
     * @var array $callback_search
     */
    protected $callback_search = [
        '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', // <a href="">
        '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i',         // h1 - h6
        '/<(th)( [^>]*)?>(.*?)<\/th>/i',                       // <th> and </th>
    ];

    /**
     * List of preg* regular expression patterns to search for in PRE body,
     * used in conjunction with $pre_replace.
     *
     * @var array $pre_search
     * @see self::$pre_replace
     */
    protected $pre_search = [
        "/\n/",
        "/\t/",
        '/ /',
        '/<pre[^>]*>/',
        '/<\/pre>/'
    ];

    /**
     * List of pattern replacements corresponding to patterns searched for PRE body.
     *
     * @var array $pre_replace
     * @see self::$pre_search
     */
    protected $pre_replace = [
        '<br>',
        '&nbsp;&nbsp;&nbsp;&nbsp;',
        '&nbsp;',
        '',
        ''
    ];

    /**
     * Temp. PRE content
     *
     * @var string $pre_content
     */
    protected $pre_content = '';

    /**
     * Contains a list of HTML tags to allow in the resulting text.
     *
     * @var string $allowed_tags
     * @see self::set_allowed_tags()
     */
    protected $allowed_tags = '';

    /**
     * Contains the base URL that relative links should resolve to.
     *
     * @var string $url
     */
    protected $url;

    /**
     * Indicates whether content in the $html variable has been converted yet.
     *
     * @var bool $_converted
     * @see self::$html
     * @see self::$text
     */
    protected $_converted = false;

    /**
     * Contains URL addresses from links to be rendered in plain text.
     *
     * @var array $_link_list
     * @see self::_build_link_list()
     */
    protected $_link_list = [];

    /**
     * Links handling.
     * - 0 if links should be removed
     * - 1 if a table of link URLs should be listed after the text
     * - 2 if the link should be displayed to the original point in the text they appeared
     *
     * @var int $_links_mode
     */
    protected $_links_mode = 1;

    /**
     * Constructor.
     *
     * If the HTML source string (or file) is supplied, the class
     * will instantiate with that source propagated, all that has
     * to be done it to call get_text().
     *
     * @param string   $source     HTML content
     * @param bool     $from_file  Indicates $source is a file to pull content from
     * @param bool|int $links_mode Links handling mode
     * @param int      $width      Maximum width of the formatted text, 0 for no limit
     */
    function __construct($source = '', $from_file = false, $links_mode = self::LINKS_DEFAULT, $width = 75, $charset = 'UTF-8')
    {
        if (!empty($source)) {
            $this->set_html($source, $from_file);
        }

        $this->set_base_url();
        $this->set_links_mode($links_mode);

        $this->width   = $width;
        $this->charset = $charset;
    }

    /**
     * Sets the links behavior mode
     *
     * @param bool|int $mode
     */
    private function set_links_mode($mode)
    {
        $allowed = [
            self::LINKS_NONE,
            self::LINKS_END,
            self::LINKS_INLINE
        ];

        if (!in_array((int) $mode, $allowed)) {
            $this->_links_mode = self::LINKS_DEFAULT;
            return;
        }

        $this->_links_mode = (int) $mode;
    }

    /**
     * Loads source HTML into memory, either from $source string or a file.
     *
     * @param string $source    HTML content
     * @param bool   $from_file Indicates $source is a file to pull content from
     */
    function set_html($source, $from_file = false)
    {
        if ($from_file && file_exists($source)) {
            $this->html = file_get_contents($source);
        }
        else {
            $this->html = $source;
        }

        $this->_converted = false;
    }

    /**
     * Returns the text, converted from HTML.
     *
     * @return string Plain text
     */
    function get_text()
    {
        if (!$this->_converted) {
            $this->_convert();
        }

        return $this->text;
    }

    /**
     * Prints the text, converted from HTML.
     */
    function print_text()
    {
        print $this->get_text();
    }

    /**
     * Sets the allowed HTML tags to pass through to the resulting text.
     *
     * Tags should be in the form "<p>", with no corresponding closing tag.
     */
    function set_allowed_tags($allowed_tags = '')
    {
        if (!empty($allowed_tags)) {
            $this->allowed_tags = $allowed_tags;
        }
    }

    /**
     * Sets a base URL to handle relative links.
     */
    function set_base_url($url = '')
    {
        if (empty($url)) {
            if (!empty($_SERVER['HTTP_HOST'])) {
                $this->url = 'http://' . $_SERVER['HTTP_HOST'];
            }
            else {
                $this->url = '';
            }
        }
        else {
            // Strip any trailing slashes for consistency (relative
            // URLs may already start with a slash like "/file.html")
            if (substr($url, -1) == '/') {
                $url = substr($url, 0, -1);
            }
            $this->url = $url;
        }
    }

    /**
     * Workhorse function that does actual conversion (calls _converter() method).
     */
    protected function _convert()
    {
        // Variables used for building the link list
        $this->_link_list = [];

        $text = $this->html;

        // Convert HTML to TXT
        $this->_converter($text);

        // Add link list
        if (!empty($this->_link_list)) {
            $text .= "\n\nLinks:\n------\n";
            foreach ($this->_link_list as $idx => $url) {
                $text .= '[' . ($idx+1) . '] ' . $url . "\n";
            }
        }

        $this->text       = $text;
        $this->_converted = true;
    }

    /**
     * Workhorse function that does actual conversion.
     *
     * First performs custom tag replacement specified by $search and
     * $replace arrays. Then strips any remaining HTML tags, reduces whitespace
     * and newlines to a readable format, and word wraps the text to
     * $width characters.
     *
     * @param string &$text Reference to HTML content string
     */
    protected function _converter(&$text)
    {
        // Convert <BLOCKQUOTE> (before PRE!)
        $this->_convert_blockquotes($text);

        // Convert <PRE>
        $this->_convert_pre($text);

        // Remove body tag and anything before
        // We used to have '/^.*<body[^>]*>\n*/is' in $this->search, but this requires
        // high pcre.backtrack_limit setting when converting long HTML strings (#8137)
        if (($pos = stripos($text, '<body')) !== false) {
            $pos = strpos($text, '>', $pos);
            $text = substr($text, $pos + 1);
            $text = ltrim($text);
        }

        // Run our defined tags search-and-replace
        $text = preg_replace($this->search, $this->replace, $text);

        // Run our defined tags search-and-replace with callback
        $text = preg_replace_callback($this->callback_search, [$this, 'tags_preg_callback'], $text);

        // Strip any other HTML tags
        $text = strip_tags($text, $this->allowed_tags);

        // Run our defined entities/characters search-and-replace
        $text = preg_replace($this->ent_search, $this->ent_replace, $text);

        // Replace known html entities
        $text = html_entity_decode($text, ENT_QUOTES, $this->charset);

        // Replace unicode nbsp to regular spaces
        $text = preg_replace('/\xC2\xA0/', ' ', $text);

        // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
        $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);

        // Convert "|+|amp|+|" into "&", need to be done after handling of unknown entities
        // This properly handles situation of "&amp;quot;" in input string
        $text = str_replace('|+|amp|+|', '&', $text);

        // Bring down number of empty lines to 2 max
        $text = preg_replace("/\n\s+\n/", "\n\n", $text);
        $text = preg_replace("/[\n]{3,}/", "\n\n", $text);

        // remove leading empty lines (can be produced by e.g. P tag on the beginning)
        $text = ltrim($text, "\n");

        // Wrap the text to a readable format
        // for PHP versions >= 4.0.2. Default width is 75
        // If width is 0 or less, don't wrap the text.
        if ($this->width > 0) {
            $text = wordwrap($text, $this->width);
        }
    }

    /**
     * Helper function called by preg_replace() on link replacement.
     *
     * Maintains an internal list of links to be displayed at the end of the
     * text, with numeric indices or simply the link to the original point in the text they
     * appeared. Also makes an effort at identifying and handling absolute
     * and relative links.
     *
     * @param string $link    URL of the link
     * @param string $display Part of the text to associate number with
     */
    protected function _handle_link($link, $display)
    {
        if (empty($link)) {
            return $display;
        }

        // Ignored link types
        if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
            return $display;
        }

        // skip links with href == content (#1490434)
        if ($link === $display) {
            return $display;
        }

        if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
            $url = $link;
        }
        else {
            $url = $this->url;
            if (substr($link, 0, 1) != '/') {
                $url .= '/';
            }
            $url .= "$link";
        }

        if (self::LINKS_NONE === $this->_links_mode) {
            // When not using link list use URL if there's no content (#5795)
            // The content here is HTML, convert it to text first
            $h2t     = new rcube_html2text($display, false, false, 1024, $this->charset);
            $display = $h2t->get_text();

            if (empty($display) && preg_match('!^([a-z][a-z0-9.+-]+://)!i', $link)) {
                return $link;
            }

            return $display;
        }

        if (self::LINKS_INLINE === $this->_links_mode) {
            return $this->_build_link_inline($url, $display);
        }

        return $this->_build_link_list($url, $display);
    }

    /**
     * Helper function called by _handle_link() on link replacement.
     *
     * Displays the link next to the original point in the text they
     * appeared.
     *
     * @param string $url     URL of the link
     * @param string $display linktext
     */
    protected function _build_link_inline($url, $display)
    {
        return $display . ' &lt;' . $url . '&gt;';
    }

    /**
     * Helper function called by _handle_link() on link replacement.
     *
     * Maintains an internal list of links to be displayed at the end of the
     * text, with numeric indices to the original point in the text they
     * appeared.
     *
     * @param string $url    URL of the link
     * @param string $display Part of the text to associate number with
     */
    protected function _build_link_list($url, $display)
    {
        if (($index = array_search($url, $this->_link_list)) === false) {
            $index = count($this->_link_list);
            $this->_link_list[] = $url;
        }

        return $display . ' [' . ($index+1) . ']';
    }

    /**
     * Helper function for PRE body conversion.
     *
     * @param string &$text HTML content
     */
    protected function _convert_pre(&$text)
    {
        // get the content of PRE element
        while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
            $this->pre_content = $matches[1];

            // Run our defined tags search-and-replace with callback
            $this->pre_content = preg_replace_callback($this->callback_search,
                [$this, 'tags_preg_callback'], $this->pre_content);

            // convert the content
            $this->pre_content = sprintf('<div><br>%s<br></div>',
                preg_replace($this->pre_search, $this->pre_replace, $this->pre_content));

            // replace the content (use callback because content can contain $0 variable)
            $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU',
                [$this, 'pre_preg_callback'], $text, 1);

            // free memory
            $this->pre_content = '';
        }
    }

    /**
     * Helper function for BLOCKQUOTE body conversion.
     *
     * @param string &$text HTML content
     */
    protected function _convert_blockquotes(&$text)
    {
        $level  = 0;
        $offset = 0;

        while (($start = stripos($text, '<blockquote', $offset)) !== false) {
            $offset = $start + 12;

            do {
                $end = stripos($text, '</blockquote>', $offset);
                $next = stripos($text, '<blockquote', $offset);

                // nested <blockquote>, skip
                if ($next !== false && $next < $end) {
                    $offset = $next + 12;
                    $level++;
                }
                // nested </blockquote> tag
                if ($end !== false && $level > 0) {
                    $offset = $end + 12;
                    $level--;
                }
                // found matching end tag
                else if ($end !== false && $level == 0) {
                    $taglen = strpos($text, '>', $start) - $start;
                    $startpos = $start + $taglen + 1;

                    // get blockquote content
                    $body = trim(substr($text, $startpos, $end - $startpos));

                    // adjust text wrapping width
                    $p_width = $this->width;
                    if ($this->width > 0) $this->width -= 2;

                    // replace content with inner blockquotes
                    $this->_converter($body);

                    // restore text width
                    $this->width = $p_width;

                    // Add citation markers and create <pre> block
                    $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', [$this, 'blockquote_citation_callback'], trim($body));
                    $body = '<pre>' . htmlspecialchars($body, ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, $this->charset) . '</pre>';

                    $text = substr_replace($text, $body . "\n", $start, $end + 13 - $start);
                    $offset = 0;

                    break;
                }
                // abort on invalid tag structure (e.g. no closing tag found)
                else {
                    break;
                }
            }
            while ($end || $next);
        }
    }

    /**
     * Callback function to correctly add citation markers for blockquote contents
     */
    public function blockquote_citation_callback($m)
    {
        $line  = ltrim($m[2]);
        $space = isset($line[0]) && $line[0] == '>' ? '' : ' ';

        return $m[1] . '>' . $space . $line;
    }

    /**
     * Callback function for preg_replace_callback use.
     *
     * @param array $matches PREG matches
     *
     * @return string Element content
     */
    public function tags_preg_callback($matches)
    {
        switch (strtolower($matches[1])) {
        case 'th':
            return $this->_toupper("\t\t". $matches[3] ."\n");
        case 'h':
            return $this->_toupper("\n\n". $matches[3] ."\n\n");
        case 'a':
            // Remove spaces in URL (#1487805)
            $url = str_replace(' ', '', $matches[3]);
            $url = html_entity_decode($url, \ENT_HTML5, RCUBE_CHARSET);
            return $this->_handle_link($url, $matches[4]);
        }
    }

    /**
     * Callback function for preg_replace_callback use in PRE content handler.
     *
     * @param array $matches PREG matches
     *
     * @return string PRE content
     */
    public function pre_preg_callback($matches)
    {
        return $this->pre_content;
    }

    /**
     * Strtoupper function with HTML tags and entities handling.
     *
     * @param string $str Text to convert
     *
     * @return string Converted text
     */
    private function _toupper($str)
    {
        // string can containing HTML tags
        $chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);

        // convert toupper only the text between HTML tags
        foreach ($chunks as $idx => $chunk) {
            if ($chunk[0] != '<') {
                $chunks[$idx] = $this->_strtoupper($chunk);
            }
        }

        return implode($chunks);
    }

    /**
     * Strtoupper multibyte wrapper function with HTML entities handling.
     *
     * @param string $str Text to convert
     *
     * @return string Converted text
     */
    private function _strtoupper($str)
    {
        $str = html_entity_decode($str, ENT_COMPAT, $this->charset);
        $str = mb_strtoupper($str);
        $str = htmlspecialchars($str, ENT_COMPAT, $this->charset);

        return $str;
    }
}