content.gsub - Parse HTML When Reading Atom Feeds

ATOM 피드를 읽을 때, html를 변환

def rss_html_parse(content) 
    content.gsub!("&lt;","<") 
    content.gsub!("&gt;",">") 
    content.gsub!("\\","") 
    content
end


아래의 코드도 참고 하면 좋을듯 합니다.

http://www.koders.com/ruby/fid16C1CE4DCC34F06BDECCAF265080FD2E60334D31.aspx

#
# $Revision: 1.4 $, $Date: 2007/03/10 04:23:47 $
#
# This is free software with ABSOLUTELY NO WARRANTY.
# You can redistribute it and/or modify it under GPL2.
#

=begin
S_TABLE = {\
	'whisper' => 'ϵ',
	'think' => '',
	'groan' => '',
	'ring' => '',
}
=end
C_TABLE = {\
	'ϵ' => 'red',
	'' => 'gray',
	'' => 'darkblue',
	'' => 'green',
}

AttrTable = {
	'&#X09;'=>"\t", '&#X0A;'=>"\n",
	'&#X0C;'=>"\f", '&#X0D;'=>"\r",
	'&#X20;'=>" "
}

def mb_message(str)
	str + '<hr>'
end

def mb_convert(content, href, d)
	content.gsub!(/^<\/div><\/div>$/, '<hr>')
	content.gsub!(/<img.*?>/, '')
	content.gsub!(/<table.*?>/, '')
	content.gsub!(/<\/table>/, '<hr>')
	content.gsub!(/<span.*?>/, '')
	content.gsub!(/<div class="announce">(.*?)<\/div>/, '\1<hr>')
	content.gsub!(/<div class="extra">(.*?)<\/div>/, '\1<hr>')
	content.gsub!(/<div class="mes_([^>]+)_body1">(.*?)<\/div>/) {
		if $1 == 'say'
			"<br>#{$2}"
		else
			"#{S_TABLE[$1]}\n<br>#{$2}"
		end
	}
	content.gsub!(/<div.*?>/, '')
	content.gsub!(/<\/(div|span|td|tr)>/, '')
	content.gsub!(/<(tr|td).*?>/, '')
		content.gsub!(/<a href="#(.*?)" class="anchor">(\(.?\d+\))<\/a>/, '<a name="\1">\2</a>')
	content.gsub!(/ onmouseover=".*?();">/, '>')
		content.gsub!(/<a class.*? href=".*?(&date=(\d+))*#(.*?)">/) {
		if $1
			%Q(<a href="#{href}&date=#{$2}&href=#{$3}##{$3}">)
		else
			%Q(<a href="#{href}&date=#{d}&href=#{$3}##{$3}">)
		end
	}
	content.gsub!(/<a href="matome.cgi\?.*?">(.*?)<\/a>/, '\1')
	content.gsub!(/^(.*?)(.*?)$/, '\2\1')
	content.gsub!(/(.*?)(.*?)<hr>/m) {
		%Q(<font color="#{C_TABLE[$1]}">#{$1}#{$2}</font><hr>)
	}
	content.gsub!(/<hr>/, "<hr>\n")
	content
end

# from vikiwiki for w3m input hidden line feed problem
def escapeAttr(value)
	value.gsub(/\s/mn) {|s| "&amp;#X%02X;" % [s[0]]}
end

def unescapeAttr(value)
	v = value.dup
	AttrTable.each {|f, t| v.gsub!(f, t) }
	v
end

아... 머리 아프다.

댓글 쓰기