class Net::HTTPResponse
Attributes
no_cache[RW]
Public Instance Methods
body_charset(str=self.raw_body)
click to toggle source
# File lib/rbot/httputil.rb, line 32 def body_charset(str=self.raw_body) ctype = self['content-type'] || 'text/html' return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i charsets = ['ISO-8859-1'] # should be in config if ctype.match(/charset=["']?([^\s"']+)["']?/i) charsets << $1 debug "charset #{charsets.last} added from header" end # str might be invalid utf-8 that will crash on the pattern match: str.encode!('UTF-8', 'UTF-8', :invalid => :replace) case str when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i charsets << $1 debug "xml charset #{charsets.last} added from xml pi" when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i meta = $1 if meta =~ /charset=['"]?([^\s'";]+)['"]?/ charsets << $1 debug "html charset #{charsets.last} added from meta" end end return charsets.uniq end
body_to_utf(str)
click to toggle source
# File lib/rbot/httputil.rb, line 59 def body_to_utf(str) charsets = self.body_charset(str) or return str charsets.reverse_each do |charset| begin debug "try decoding using #{charset}" str.force_encoding(charset) tmp = str.encode('UTF-16le', :invalid => :replace, :replace => '').encode('UTF-8') if tmp str = tmp break end rescue error 'failed to use encoding' error $! end end return str end
cooked_body()
click to toggle source
# File lib/rbot/httputil.rb, line 129 def cooked_body return self.body_to_utf(self.decompress_body(self.raw_body)) end
decompress_body(str)
click to toggle source
# File lib/rbot/httputil.rb, line 80 def decompress_body(str) method = self['content-encoding'] case method when nil return str when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers debug "gunzipping body" begin return Zlib::GzipReader.new(StringIO.new(str)).read rescue Zlib::Error => e # If we can't unpack the whole stream (e.g. because we're doing a # partial read debug "full gunzipping failed (#{e}), trying to recover as much as possible" ret = '' ret.force_encoding(Encoding::ASCII_8BIT) begin Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte| ret << byte } rescue end return ret end when 'deflate' debug "inflating body" # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread: # -MAX_WBITS stops zlib from looking for a zlib header inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS) begin return inflater.inflate(str) rescue Zlib::Error => e raise e # TODO # debug "full inflation failed (#{e}), trying to recover as much as possible" end when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i # B0rked servers (Freshmeat being one of them) sometimes return the charset # in the content-encoding; in this case we assume that the document has # a standard content-encoding old_hsh = self.to_hash self['content-type']= self['content-type']+"; charset="+method.downcase warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}" return str else debug self.to_hash raise "Unhandled content encoding #{method}" end end
partial_body(size=0) { |body_to_utf(decompress_body)| ... }
click to toggle source
Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.
# File lib/rbot/httputil.rb, line 135 def partial_body(size=0, &block) partial = String.new if @read debug "using body() as partial" partial = self.body yield self.body_to_utf(self.decompress_body(partial)) if block_given? else debug "disabling cache" self.no_cache = true self.read_body { |chunk| partial << chunk yield self.body_to_utf(self.decompress_body(partial)) if block_given? break if size and size > 0 and partial.length >= size } end return self.body_to_utf(self.decompress_body(partial)) end
to_json()
click to toggle source
# File lib/rbot/httputil.rb, line 161 def to_json JSON::parse(self.body) end
xpath(path)
click to toggle source
# File lib/rbot/httputil.rb, line 156 def xpath(path) document = Nokogiri::HTML.parse(self.body) document.xpath(path) end