diff --git a/contrib/epee/include/net/http_client.h b/contrib/epee/include/net/http_client.h index 515da64a..76d823b9 100644 --- a/contrib/epee/include/net/http_client.h +++ b/contrib/epee/include/net/http_client.h @@ -38,9 +38,9 @@ #include "net_helper.h" #include "http_client_base.h" -#ifdef HTTP_ENABLE_GZIP +//#ifdef HTTP_ENABLE_GZIP #include "gzip_encoding.h" -#endif +//#endif #include "string_tools.h" #include "reg_exp_definer.h" @@ -57,827 +57,1002 @@ extern epee::critical_section gregexp_lock; namespace epee { -namespace net_utils -{ - -using namespace std; - - /*struct url - { - public: - void parse(const std::string& url_s) - { - const string prot_end("://"); - string::const_iterator prot_i = search(url_s.begin(), url_s.end(), - prot_end.begin(), prot_end.end()); - protocol_.reserve(distance(url_s.begin(), prot_i)); - transform(url_s.begin(), prot_i, - back_inserter(protocol_), - ptr_fun(tolower)); // protocol is icase - if( prot_i == url_s.end() ) - return; - advance(prot_i, prot_end.length()); - string::const_iterator path_i = find(prot_i, url_s.end(), '/'); - host_.reserve(distance(prot_i, path_i)); - transform(prot_i, path_i, - back_inserter(host_), - ptr_fun(tolower)); // host is icase - string::const_iterator query_i = find(path_i, url_s.end(), '?'); - path_.assign(path_i, query_i); - if( query_i != url_s.end() ) - ++query_i; - query_.assign(query_i, url_s.end()); - } - - std::string protocol_; - std::string host_; - std::string path_; - std::string query_; - };*/ - - - - - //--------------------------------------------------------------------------- - static inline const char* get_hex_vals() - { - static char hexVals[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; - return hexVals; - } - - static inline const char* get_unsave_chars() - { - //static char unsave_chars[] = "\"<>%\\^[]`+$,@:;/!#?=&"; - static char unsave_chars[] = "\"<>%\\^[]`+$,@:;!#&"; - return unsave_chars; - } - - static inline bool is_unsafe(unsigned char compare_char) - { - if(compare_char <= 32 || compare_char >= 123) - return true; - - const char* punsave = get_unsave_chars(); - - for(int ichar_pos = 0; 0!=punsave[ichar_pos] ;ichar_pos++) - if(compare_char == punsave[ichar_pos]) - return true; - - return false; - } - - static inline - std::string dec_to_hex(char num, int radix) - { - int temp=0; - std::string csTmp; - int num_char; - - num_char = (int) num; - if (num_char < 0) - num_char = 256 + num_char; - - while (num_char >= radix) - { - temp = num_char % radix; - num_char = (int)floor((float)num_char / (float)radix); - csTmp = get_hex_vals()[temp]; - } - - csTmp += get_hex_vals()[num_char]; - - if(csTmp.size() < 2) - { - csTmp += '0'; - } - - std::reverse(csTmp.begin(), csTmp.end()); - //_mbsrev((unsigned char*)csTmp.data()); - - return csTmp; - } - - static inline std::string convert(char val) - { - std::string csRet; - csRet += "%"; - csRet += dec_to_hex(val, 16); - return csRet; - } - static inline std::string conver_to_url_format(const std::string& uri) - { - - std::string result; - - for(size_t i = 0; i!= uri.size(); i++) - { - if(is_unsafe(uri[i])) - result += convert(uri[i]); - else - result += uri[i]; - - } - - return result; - } - - static inline std::string convert_to_url_format_force_all(const std::string& uri) + namespace net_utils { - std::string result; + using namespace std; - for(size_t i = 0; i!= uri.size(); i++) + /*struct url { - result += convert(uri[i]); + public: + void parse(const std::string& url_s) + { + const string prot_end("://"); + string::const_iterator prot_i = search(url_s.begin(), url_s.end(), + prot_end.begin(), prot_end.end()); + protocol_.reserve(distance(url_s.begin(), prot_i)); + transform(url_s.begin(), prot_i, + back_inserter(protocol_), + ptr_fun(tolower)); // protocol is icase + if( prot_i == url_s.end() ) + return; + advance(prot_i, prot_end.length()); + string::const_iterator path_i = find(prot_i, url_s.end(), '/'); + host_.reserve(distance(prot_i, path_i)); + transform(prot_i, path_i, + back_inserter(host_), + ptr_fun(tolower)); // host is icase + string::const_iterator query_i = find(path_i, url_s.end(), '?'); + path_.assign(path_i, query_i); + if( query_i != url_s.end() ) + ++query_i; + query_.assign(query_i, url_s.end()); } - return result; - } + std::string protocol_; + std::string host_; + std::string path_; + std::string query_; + };*/ + //--------------------------------------------------------------------------- + static inline const char* get_hex_vals() + { + static char hexVals[16] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' }; + return hexVals; + } - namespace http - { - template - class http_simple_client_t: public i_target_handler - { - public: - + static inline const char* get_unsave_chars() + { + //static char unsave_chars[] = "\"<>%\\^[]`+$,@:;/!#?=&"; + static char unsave_chars[] = "\"<>%\\^[]`+$,@:;!#&"; + return unsave_chars; + } - private: - enum reciev_machine_state - { - reciev_machine_state_header, - reciev_machine_state_body_content_len, - reciev_machine_state_body_connection_close, - reciev_machine_state_body_chunked, - reciev_machine_state_done, - reciev_machine_state_error - }; + static inline bool is_unsafe(unsigned char compare_char) + { + if (compare_char <= 32 || compare_char >= 123) + return true; + const char* punsave = get_unsave_chars(); + for (int ichar_pos = 0; 0 != punsave[ichar_pos]; ichar_pos++) + if (compare_char == punsave[ichar_pos]) + return true; - enum chunked_state{ - http_chunked_state_chunk_head, - http_chunked_state_chunk_body, - http_chunked_state_done, - http_chunked_state_undefined - }; + return false; + } + static inline + std::string dec_to_hex(char num, int radix) + { + int temp = 0; + std::string csTmp; + int num_char; - blocked_mode_client_t m_net_client; - std::string m_host_buff; - std::string m_port; - unsigned int m_timeout; - std::string m_header_cache; - http_response_info m_response_info; - size_t m_len_in_summary; - size_t m_len_in_remain; - //std::string* m_ptarget_buffer; - boost::shared_ptr m_pcontent_encoding_handler; - reciev_machine_state m_state; - chunked_state m_chunked_state; - std::string m_chunked_cache; - critical_section m_lock; + num_char = (int)num; + if (num_char < 0) + num_char = 256 + num_char; - public: - void set_host_name(const std::string& name) - { - CRITICAL_REGION_LOCAL(m_lock); - m_host_buff = name; - } - - //boost::asio::ip::tcp::socket& get_socket() - //{ - // return m_net_client.get_socket(); - //} - - - bool connect(const std::string& host, int port, unsigned int timeout) + while (num_char >= radix) { - return connect(host, std::to_string(port), timeout); + temp = num_char % radix; + num_char = (int)floor((float)num_char / (float)radix); + csTmp = get_hex_vals()[temp]; } - bool connect(const std::string& host, const std::string& port, unsigned int timeout) + + csTmp += get_hex_vals()[num_char]; + + if (csTmp.size() < 2) { - CRITICAL_REGION_LOCAL(m_lock); - m_host_buff = host; - m_port = port; - m_timeout = timeout; - - return m_net_client.connect(host, port, timeout, timeout); + csTmp += '0'; } - //--------------------------------------------------------------------------- - bool disconnect() - { - CRITICAL_REGION_LOCAL(m_lock); - return m_net_client.disconnect(); - } - //--------------------------------------------------------------------------- - bool is_connected() - { - CRITICAL_REGION_LOCAL(m_lock); - return m_net_client.is_connected(); - } - //--------------------------------------------------------------------------- - virtual bool handle_target_data(std::string& piece_of_transfer) - { - CRITICAL_REGION_LOCAL(m_lock); - m_response_info.m_body += piece_of_transfer; - piece_of_transfer.clear(); - return true; - } - //--------------------------------------------------------------------------- - inline - bool invoke_get(const std::string& uri, const std::string& body = std::string(), const http_response_info** ppresponse_info = NULL, const fields_list& additional_params = fields_list()) - { - CRITICAL_REGION_LOCAL(m_lock); - return invoke(uri, "GET", body, ppresponse_info, additional_params); - } - //--------------------------------------------------------------------------- - inline bool invoke(const std::string& uri, const std::string& method, const std::string& body, const http_response_info** ppresponse_info = NULL, const fields_list& additional_params = fields_list()) - { - CRITICAL_REGION_LOCAL(m_lock); - if(!is_connected()) - { - LOG_PRINT("Reconnecting...", LOG_LEVEL_3); - if(!connect(m_host_buff, m_port, m_timeout)) - { - LOG_PRINT("Failed to connect to " << m_host_buff << ":" << m_port, LOG_LEVEL_3); - return false; - } - } - m_response_info.clear(); - std::string req_buff = method + " "; - req_buff += uri + " HTTP/1.1\r\n" + - "Host: "+ m_host_buff +"\r\n" + "Content-Length: " + boost::lexical_cast(body.size()) + "\r\n"; + std::reverse(csTmp.begin(), csTmp.end()); + //_mbsrev((unsigned char*)csTmp.data()); + + return csTmp; + } + + static inline std::string convert(char val) + { + std::string csRet; + csRet += "%"; + csRet += dec_to_hex(val, 16); + return csRet; + } + static inline std::string conver_to_url_format(const std::string& uri) + { + + std::string result; + + for (size_t i = 0; i != uri.size(); i++) + { + if (is_unsafe(uri[i])) + result += convert(uri[i]); + else + result += uri[i]; + + } + + return result; + } + + static inline std::string convert_to_url_format_force_all(const std::string& uri) + { + + std::string result; + + for (size_t i = 0; i != uri.size(); i++) + { + result += convert(uri[i]); + } + + return result; + } - //handle "additional_params" - for(fields_list::const_iterator it = additional_params.begin(); it!=additional_params.end(); it++) - req_buff += it->first + ": " + it->second + "\r\n"; - req_buff += "\r\n"; - //-- - bool res = m_net_client.send(req_buff); - CHECK_AND_ASSERT_MES(res, false, "HTTP_CLIENT: Failed to SEND"); - if(body.size()) - res = m_net_client.send(body); - CHECK_AND_ASSERT_MES(res, false, "HTTP_CLIENT: Failed to SEND"); - if(ppresponse_info) - *ppresponse_info = &m_response_info; - m_state = reciev_machine_state_header; - return handle_reciev(); - } - //--------------------------------------------------------------------------- - inline bool invoke_post(const std::string& uri, const std::string& body, const http_response_info** ppresponse_info = NULL, const fields_list& additional_params = fields_list()) - { - CRITICAL_REGION_LOCAL(m_lock); - return invoke(uri, "POST", body, ppresponse_info, additional_params); - } - private: - //--------------------------------------------------------------------------- - inline bool handle_reciev() - { - CRITICAL_REGION_LOCAL(m_lock); - bool keep_handling = true; - bool need_more_data = true; - std::string recv_buffer; - while(keep_handling) - { - if(need_more_data) - { - if(!m_net_client.recv(recv_buffer)) - { - LOG_PRINT("Unexpected reciec fail", LOG_LEVEL_3); - m_state = reciev_machine_state_error; - } - if(!recv_buffer.size()) - { - //connection is going to be closed - if(reciev_machine_state_body_connection_close != m_state) - { - m_state = reciev_machine_state_error; - } - } - need_more_data = false; - } - switch(m_state) - { - case reciev_machine_state_header: - keep_handling = handle_header(recv_buffer, need_more_data); - break; - case reciev_machine_state_body_content_len: - keep_handling = handle_body_content_len(recv_buffer, need_more_data); - break; - case reciev_machine_state_body_connection_close: - keep_handling = handle_body_connection_close(recv_buffer, need_more_data); - break; - case reciev_machine_state_body_chunked: - keep_handling = handle_body_body_chunked(recv_buffer, need_more_data); - break; - case reciev_machine_state_done: - keep_handling = false; - break; - case reciev_machine_state_error: - keep_handling = false; - break; - } + namespace http + { + template + class http_simple_client_t : public i_target_handler + { + public: - } - m_header_cache.clear(); - if(m_state != reciev_machine_state_error) - { - if(m_response_info.m_header_info.m_connection.size() && !string_tools::compare_no_case("close", m_response_info.m_header_info.m_connection)) - disconnect(); - return true; - } - else - { - LOG_PRINT_L3("Returning false because of wrong state machine. state: " << m_state); - return false; - } - } - //--------------------------------------------------------------------------- - inline - bool handle_header(std::string& recv_buff, bool& need_more_data) - { - - CRITICAL_REGION_LOCAL(m_lock); - if(!recv_buff.size()) + private: + enum reciev_machine_state { - LOG_ERROR("Connection closed at handle_header"); - m_state = reciev_machine_state_error; - return false; + reciev_machine_state_header, + reciev_machine_state_body_content_len, + reciev_machine_state_body_connection_close, + reciev_machine_state_body_chunked, + reciev_machine_state_done, + reciev_machine_state_error + }; + + + + enum chunked_state { + http_chunked_state_chunk_head, + http_chunked_state_chunk_body, + http_chunked_state_done, + http_chunked_state_undefined + }; + + + blocked_mode_client_t m_net_client; + std::string m_host_buff; + std::string m_port; + //unsigned int m_timeout; + unsigned int m_connection_timeout; + unsigned int m_recv_timeout; + std::string m_header_cache; + http_response_info m_response_info; + //std::string* m_ptarget_buffer; + boost::shared_ptr m_pcontent_encoding_handler; + reciev_machine_state m_state; + chunked_state m_chunked_state; + std::string m_chunked_cache; + critical_section m_lock; + + protected: + uint64_t m_len_in_summary; + uint64_t m_len_in_remain; + + public: + void set_host_name(const std::string& name) + { + CRITICAL_REGION_LOCAL(m_lock); + m_host_buff = name; } - m_header_cache += recv_buff; - recv_buff.clear(); - std::string::size_type pos = m_header_cache.find("\r\n\r\n"); - if(pos != std::string::npos) - { - recv_buff.assign(m_header_cache.begin()+pos+4, m_header_cache.end()); - m_header_cache.erase(m_header_cache.begin()+pos+4, m_header_cache.end()); - - analize_cached_header_and_invoke_state(); - m_header_cache.clear(); - if(!recv_buff.size() && (m_state != reciev_machine_state_error && m_state != reciev_machine_state_done)) - need_more_data = true; - - return true; - }else - need_more_data = true; - return true; - } - //--------------------------------------------------------------------------- - inline - bool handle_body_content_len(std::string& recv_buff, bool& need_more_data) - { - CRITICAL_REGION_LOCAL(m_lock); - if(!recv_buff.size()) - { - LOG_PRINT("Warning: Content-Len mode, but connection unexpectedly closed", LOG_LEVEL_3); - m_state = reciev_machine_state_done; - return true; - } - CHECK_AND_ASSERT_MES(m_len_in_remain >= recv_buff.size(), false, "m_len_in_remain >= recv_buff.size()"); - m_len_in_remain -= recv_buff.size(); - m_pcontent_encoding_handler->update_in(recv_buff); - - if(m_len_in_remain == 0) - m_state = reciev_machine_state_done; - else - need_more_data = true; - - return true; - } - //--------------------------------------------------------------------------- - inline - bool handle_body_connection_close(std::string& recv_buff, bool& need_more_data) - { - CRITICAL_REGION_LOCAL(m_lock); - if(!recv_buff.size()) - { - m_state = reciev_machine_state_done; - return true; - } - need_more_data = true; - m_pcontent_encoding_handler->update_in(recv_buff); + boost::asio::ip::tcp::socket& get_socket() + { + return m_net_client.get_socket(); + } - return true; - } - //--------------------------------------------------------------------------- - inline bool is_hex_symbol(char ch) - { + bool connect(const std::string& host, int port, unsigned int timeout) + { + return connect(host, std::to_string(port), timeout); + } - if( (ch >= '0' && ch <='9')||(ch >= 'A' && ch <='F')||(ch >= 'a' && ch <='f')) - return true; - else - return false; - } - //--------------------------------------------------------------------------- - inline - bool get_len_from_chunk_head(const std::string &chunk_head, size_t& result_size) - { - std::stringstream str_stream; - str_stream << std::hex; - if(!(str_stream << chunk_head && str_stream >> result_size)) - return false; + bool set_timeouts(unsigned int connection_timeout, unsigned int recv_timeout) + { + m_connection_timeout = connection_timeout; + m_recv_timeout = recv_timeout; + return true; + } - return true; - } - //--------------------------------------------------------------------------- - inline - bool get_chunk_head(std::string& buff, size_t& chunk_size, bool& is_matched) - { - is_matched = false; - size_t offset = 0; - for(std::string::iterator it = buff.begin(); it!= buff.end(); it++, offset++) - { - if(!is_hex_symbol(*it)) - { - if(*it == '\r' || *it == ' ' ) - { - offset--; - continue; - } - else if(*it == '\n') - { - std::string chunk_head = buff.substr(0, offset); - if(!get_len_from_chunk_head(chunk_head, chunk_size)) - return false; + bool connect(const std::string& host, std::string port) + { + CRITICAL_REGION_LOCAL(m_lock); + m_host_buff = host; + m_port = port; - if(0 == chunk_size) - { - //Here is a small confusion - //In breif - if the chunk is the last one we need to get terminating sequence - //along with the cipher, generally in the "ddd\r\n\r\n" form + return m_net_client.connect(host, port, m_connection_timeout, m_recv_timeout); + } - for(it++;it != buff.end(); it++) - { - if('\r' == *it) - continue; - else if('\n' == *it) - break; - else - { - LOG_ERROR("http_stream_filter: Wrong last chunk terminator"); - return false; - } - } + bool connect(const std::string& host, const std::string& port, unsigned int timeout) + { + m_connection_timeout = m_recv_timeout = timeout; + return connect(host, port); + } + //--------------------------------------------------------------------------- + bool disconnect() + { + CRITICAL_REGION_LOCAL(m_lock); + return m_net_client.disconnect(); + } + //--------------------------------------------------------------------------- + bool is_connected() + { + CRITICAL_REGION_LOCAL(m_lock); + return m_net_client.is_connected(); + } + //--------------------------------------------------------------------------- + virtual bool handle_target_data(std::string& piece_of_transfer) + { + CRITICAL_REGION_LOCAL(m_lock); + m_response_info.m_body += piece_of_transfer; + piece_of_transfer.clear(); + return true; + } + //--------------------------------------------------------------------------- + inline + bool invoke_get(const std::string& uri, const std::string& body = std::string(), const http_response_info** ppresponse_info = NULL, const fields_list& additional_params = fields_list()) + { + CRITICAL_REGION_LOCAL(m_lock); + return invoke(uri, "GET", body, ppresponse_info, additional_params); + } - if(it == buff.end()) - return true; - } + //--------------------------------------------------------------------------- + inline bool invoke(const std::string& uri, const std::string& method, const std::string& body, const http_response_info** ppresponse_info = NULL, const fields_list& additional_params = fields_list()) + { + CRITICAL_REGION_LOCAL(m_lock); + if (!is_connected()) + { + LOG_PRINT("Reconnecting...", LOG_LEVEL_3); + if (!connect(m_host_buff, m_port)) + { + LOG_PRINT("Failed to connect to " << m_host_buff << ":" << m_port, LOG_LEVEL_3); + return false; + } + } + m_response_info.clear(); + std::string req_buff = method + " "; + req_buff += uri + " HTTP/1.1\r\n" + + "Host: " + m_host_buff + "\r\n" + "Content-Length: " + boost::lexical_cast(body.size()) + "\r\n"; - buff.erase(buff.begin(), ++it); - is_matched = true; - return true; - } - else - return false; - } - } + //handle "additional_params" + for (fields_list::const_iterator it = additional_params.begin(); it != additional_params.end(); it++) + req_buff += it->first + ": " + it->second + "\r\n"; + req_buff += "\r\n"; + //-- - return true; - } - //--------------------------------------------------------------------------- - inline - bool handle_body_body_chunked(std::string& recv_buff, bool& need_more_data) - { - CRITICAL_REGION_LOCAL(m_lock); - if(!recv_buff.size()) - { - LOG_PRINT("Warning: CHUNKED mode, but connection unexpectedly closed", LOG_LEVEL_3); - m_state = reciev_machine_state_done; - return true; - } - m_chunked_cache += recv_buff; - recv_buff.clear(); - bool is_matched = false; + bool res = m_net_client.send(req_buff); + CHECK_AND_ASSERT_MES(res, false, "HTTP_CLIENT: Failed to SEND"); + if (body.size()) + res = m_net_client.send(body); + CHECK_AND_ASSERT_MES(res, false, "HTTP_CLIENT: Failed to SEND"); - while(true) - { - if(!m_chunked_cache.size()) - { - need_more_data = true; - break; - } + if (ppresponse_info) + *ppresponse_info = &m_response_info; - switch(m_chunked_state) - { - case http_chunked_state_chunk_head: - if(m_chunked_cache[0] == '\n' || m_chunked_cache[0] == '\r') - { - //optimize a bit - if(m_chunked_cache[0] == '\r' && m_chunked_cache.size()>1 && m_chunked_cache[1] == '\n') - m_chunked_cache.erase(0, 2); - else - m_chunked_cache.erase(0, 1); - break; - } - if(!get_chunk_head(m_chunked_cache, m_len_in_remain, is_matched)) - { - LOG_ERROR("http_stream_filter::handle_chunked(*) Failed to get length from chunked head:" << m_chunked_cache); - m_state = reciev_machine_state_error; - return false; - } + m_state = reciev_machine_state_header; + return handle_reciev(); + } + //--------------------------------------------------------------------------- + inline bool invoke_post(const std::string& uri, const std::string& body, const http_response_info** ppresponse_info = NULL, const fields_list& additional_params = fields_list()) + { + CRITICAL_REGION_LOCAL(m_lock); + return invoke(uri, "POST", body, ppresponse_info, additional_params); + } + private: + //--------------------------------------------------------------------------- + inline bool handle_reciev() + { + CRITICAL_REGION_LOCAL(m_lock); + bool keep_handling = true; + bool need_more_data = true; + std::string recv_buffer; + while (keep_handling) + { + if (need_more_data) + { + if (!m_net_client.recv(recv_buffer)) + { + LOG_PRINT("Unexpected reciec fail", LOG_LEVEL_3); + m_state = reciev_machine_state_error; + } + if (!recv_buffer.size()) + { + //connection is going to be closed + if (reciev_machine_state_body_connection_close != m_state) + { + m_state = reciev_machine_state_error; + } + } + need_more_data = false; + } + switch (m_state) + { + case reciev_machine_state_header: + keep_handling = handle_header(recv_buffer, need_more_data); + break; + case reciev_machine_state_body_content_len: + keep_handling = handle_body_content_len(recv_buffer, need_more_data); + break; + case reciev_machine_state_body_connection_close: + keep_handling = handle_body_connection_close(recv_buffer, need_more_data); + break; + case reciev_machine_state_body_chunked: + keep_handling = handle_body_body_chunked(recv_buffer, need_more_data); + break; + case reciev_machine_state_done: + keep_handling = false; + break; + case reciev_machine_state_error: + keep_handling = false; + break; + } - if(!is_matched) - { - need_more_data = true; - return true; - }else - { - m_chunked_state = http_chunked_state_chunk_body; - if(m_len_in_remain == 0) - {//last chunk, let stop the stream and fix the chunk queue. - m_state = reciev_machine_state_done; - return true; - } - m_chunked_state = http_chunked_state_chunk_body; - break; - } - break; - case http_chunked_state_chunk_body: - { - std::string chunk_body; - if(m_len_in_remain >= m_chunked_cache.size()) - { - m_len_in_remain -= m_chunked_cache.size(); - chunk_body.swap(m_chunked_cache); - }else - { - chunk_body.assign(m_chunked_cache, 0, m_len_in_remain); - m_chunked_cache.erase(0, m_len_in_remain); - m_len_in_remain = 0; - } + } + m_header_cache.clear(); + if (m_state != reciev_machine_state_error) + { + if (m_response_info.m_header_info.m_connection.size() && !string_tools::compare_no_case("close", m_response_info.m_header_info.m_connection)) + disconnect(); - m_pcontent_encoding_handler->update_in(chunk_body); + return true; + } + else + { + LOG_PRINT_L3("Returning false because of wrong state machine. state: " << m_state); + return false; + } + } + //--------------------------------------------------------------------------- + inline + bool handle_header(std::string& recv_buff, bool& need_more_data) + { - if(!m_len_in_remain) - m_chunked_state = http_chunked_state_chunk_head; - } - break; - case http_chunked_state_done: - m_state = reciev_machine_state_done; - return true; - case http_chunked_state_undefined: - default: - LOG_ERROR("http_stream_filter::handle_chunked(): Wrong state" << m_chunked_state); - return false; - } - } + CRITICAL_REGION_LOCAL(m_lock); + if (!recv_buff.size()) + { + LOG_ERROR("Connection closed at handle_header"); + m_state = reciev_machine_state_error; + return false; + } - return true; - } - //--------------------------------------------------------------------------- - inline - bool parse_header(http_header_info& body_info, const std::string& m_cache_to_process) - { - LOG_FRAME("http_stream_filter::parse_cached_header(*)", LOG_LEVEL_4); - - STATIC_REGEXP_EXPR_1(rexp_mach_field, - "\n?((Connection)|(Referer)|(Content-Length)|(Content-Type)|(Transfer-Encoding)|(Content-Encoding)|(Host)|(Cookie)" - // 12 3 4 5 6 7 8 9 - "|([\\w-]+?)) ?: ?((.*?)(\r?\n))[^\t ]", - //10 1112 13 - boost::regex::icase | boost::regex::normal); + m_header_cache += recv_buff; + recv_buff.clear(); + std::string::size_type pos = m_header_cache.find("\r\n\r\n"); + if (pos != std::string::npos) + { + recv_buff.assign(m_header_cache.begin() + pos + 4, m_header_cache.end()); + m_header_cache.erase(m_header_cache.begin() + pos + 4, m_header_cache.end()); - boost::smatch result; - std::string::const_iterator it_current_bound = m_cache_to_process.begin(); - std::string::const_iterator it_end_bound = m_cache_to_process.end(); + analize_cached_header_and_invoke_state(); + m_header_cache.clear(); + if (!recv_buff.size() && (m_state != reciev_machine_state_error && m_state != reciev_machine_state_done)) + need_more_data = true; + + return true; + } + else + need_more_data = true; + return true; + } + //--------------------------------------------------------------------------- + inline + bool handle_body_content_len(std::string& recv_buff, bool& need_more_data) + { + CRITICAL_REGION_LOCAL(m_lock); + if (!recv_buff.size()) + { + LOG_PRINT("Warning: Content-Len mode, but connection unexpectedly closed", LOG_LEVEL_3); + m_state = reciev_machine_state_done; + return true; + } + CHECK_AND_ASSERT_MES(m_len_in_remain >= recv_buff.size(), false, "m_len_in_remain >= recv_buff.size()"); + m_len_in_remain -= recv_buff.size(); + bool r = m_pcontent_encoding_handler->update_in(recv_buff); + //CHECK_AND_ASSERT_MES(m_len_in_remain >= recv_buff.size(), false, "m_pcontent_encoding_handler->update_in returned false"); + if (!r) + { + m_state = reciev_machine_state_error; + disconnect(); + return false; + } + + if (m_len_in_remain == 0) + m_state = reciev_machine_state_done; + else + need_more_data = true; + + return true; + } + //--------------------------------------------------------------------------- + inline + bool handle_body_connection_close(std::string& recv_buff, bool& need_more_data) + { + CRITICAL_REGION_LOCAL(m_lock); + if (!recv_buff.size()) + { + m_state = reciev_machine_state_done; + return true; + } + need_more_data = true; + m_pcontent_encoding_handler->update_in(recv_buff); + + + return true; + } + //--------------------------------------------------------------------------- + inline bool is_hex_symbol(char ch) + { + + if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) + return true; + else + return false; + } + //--------------------------------------------------------------------------- + inline + bool get_len_from_chunk_head(const std::string &chunk_head, uint64_t& result_size) + { + std::stringstream str_stream; + str_stream << std::hex; + if (!(str_stream << chunk_head && str_stream >> result_size)) + return false; + + return true; + } + //--------------------------------------------------------------------------- + inline + bool get_chunk_head(std::string& buff, uint64_t& chunk_size, bool& is_matched) + { + is_matched = false; + size_t offset = 0; + for (std::string::iterator it = buff.begin(); it != buff.end(); it++, offset++) + { + if (!is_hex_symbol(*it)) + { + if (*it == '\r' || *it == ' ') + { + offset--; + continue; + } + else if (*it == '\n') + { + std::string chunk_head = buff.substr(0, offset); + if (!get_len_from_chunk_head(chunk_head, chunk_size)) + return false; + + if (0 == chunk_size) + { + //Here is a small confusion + //In breif - if the chunk is the last one we need to get terminating sequence + //along with the cipher, generally in the "ddd\r\n\r\n" form + + for (it++; it != buff.end(); it++) + { + if ('\r' == *it) + continue; + else if ('\n' == *it) + break; + else + { + LOG_ERROR("http_stream_filter: Wrong last chunk terminator"); + return false; + } + } + + if (it == buff.end()) + return true; + } + + buff.erase(buff.begin(), ++it); + + is_matched = true; + return true; + } + else + return false; + } + } + + return true; + } + //--------------------------------------------------------------------------- + inline + bool handle_body_body_chunked(std::string& recv_buff, bool& need_more_data) + { + CRITICAL_REGION_LOCAL(m_lock); + if (!recv_buff.size()) + { + LOG_PRINT("Warning: CHUNKED mode, but connection unexpectedly closed", LOG_LEVEL_3); + m_state = reciev_machine_state_done; + return true; + } + m_chunked_cache += recv_buff; + recv_buff.clear(); + bool is_matched = false; + + while (true) + { + if (!m_chunked_cache.size()) + { + need_more_data = true; + break; + } + + switch (m_chunked_state) + { + case http_chunked_state_chunk_head: + if (m_chunked_cache[0] == '\n' || m_chunked_cache[0] == '\r') + { + //optimize a bit + if (m_chunked_cache[0] == '\r' && m_chunked_cache.size()>1 && m_chunked_cache[1] == '\n') + m_chunked_cache.erase(0, 2); + else + m_chunked_cache.erase(0, 1); + break; + } + if (!get_chunk_head(m_chunked_cache, m_len_in_remain, is_matched)) + { + LOG_ERROR("http_stream_filter::handle_chunked(*) Failed to get length from chunked head:" << m_chunked_cache); + m_state = reciev_machine_state_error; + return false; + } + + if (!is_matched) + { + need_more_data = true; + return true; + } + else + { + m_chunked_state = http_chunked_state_chunk_body; + if (m_len_in_remain == 0) + {//last chunk, let stop the stream and fix the chunk queue. + m_state = reciev_machine_state_done; + return true; + } + m_chunked_state = http_chunked_state_chunk_body; + break; + } + break; + case http_chunked_state_chunk_body: + { + std::string chunk_body; + if (m_len_in_remain >= m_chunked_cache.size()) + { + m_len_in_remain -= m_chunked_cache.size(); + chunk_body.swap(m_chunked_cache); + } + else + { + chunk_body.assign(m_chunked_cache, 0, m_len_in_remain); + m_chunked_cache.erase(0, m_len_in_remain); + m_len_in_remain = 0; + } + + m_pcontent_encoding_handler->update_in(chunk_body); + + if (!m_len_in_remain) + m_chunked_state = http_chunked_state_chunk_head; + } + break; + case http_chunked_state_done: + m_state = reciev_machine_state_done; + return true; + case http_chunked_state_undefined: + default: + LOG_ERROR("http_stream_filter::handle_chunked(): Wrong state" << m_chunked_state); + return false; + } + } + + return true; + } + //--------------------------------------------------------------------------- + inline + bool parse_header(http_header_info& body_info, const std::string& m_cache_to_process) + { + LOG_FRAME("http_stream_filter::parse_cached_header(*)", LOG_LEVEL_4); + + STATIC_REGEXP_EXPR_1(rexp_mach_field, + "\n?((Connection)|(Referer)|(Content-Length)|(Content-Type)|(Transfer-Encoding)|(Content-Encoding)|(Host)|(Cookie)" + // 12 3 4 5 6 7 8 9 + "|([\\w-]+?)) ?: ?((.*?)(\r?\n))[^\t ]", + //10 1112 13 + boost::regex::icase | boost::regex::normal); + + boost::smatch result; + std::string::const_iterator it_current_bound = m_cache_to_process.begin(); + std::string::const_iterator it_end_bound = m_cache_to_process.end(); - //lookup all fields and fill well-known fields - while( boost::regex_search( it_current_bound, it_end_bound, result, rexp_mach_field, boost::match_default) && result[0].matched) - { - const size_t field_val = 12; - //const size_t field_etc_name = 10; + //lookup all fields and fill well-known fields + while (boost::regex_search(it_current_bound, it_end_bound, result, rexp_mach_field, boost::match_default) && result[0].matched) + { + const size_t field_val = 12; + //const size_t field_etc_name = 10; - int i = 2; //start position = 2 - if(result[i++].matched)//"Connection" - body_info.m_connection = result[field_val]; - else if(result[i++].matched)//"Referrer" - body_info.m_referer = result[field_val]; - else if(result[i++].matched)//"Content-Length" - body_info.m_content_length = result[field_val]; - else if(result[i++].matched)//"Content-Type" - body_info.m_content_type = result[field_val]; - else if(result[i++].matched)//"Transfer-Encoding" - body_info.m_transfer_encoding = result[field_val]; - else if(result[i++].matched)//"Content-Encoding" - body_info.m_content_encoding = result[field_val]; - else if(result[i++].matched)//"Host" - { body_info.m_host = result[field_val]; - string_tools::trim(body_info.m_host); - } - else if(result[i++].matched)//"Cookie" - body_info.m_cookie = result[field_val]; - else if(result[i++].matched)//e.t.c (HAVE TO BE MATCHED!) - {;} - else - {CHECK_AND_ASSERT_MES(false, false, "http_stream_filter::parse_cached_header() not matched last entry in:"<(result[1]); - m_response_info.m_http_ver_lo = boost::lexical_cast(result[2]); - m_response_info.m_response_code = boost::lexical_cast(result[3]); - - m_header_cache.erase(to_nonsonst_iterator(m_header_cache, result[0].first), to_nonsonst_iterator(m_header_cache, result[0].second)); - return true; - }else - { - LOG_ERROR("http_stream_filter::handle_invoke_reply_line(): Failed to match first response line:" << m_header_cache); - return false; - } + //First line response, look like this: "HTTP/1.1 200 OK" + STATIC_REGEXP_EXPR_1(rexp_match_first_response_line, "^HTTP/(\\d+).(\\d+) ((\\d)\\d{2})( [^\n]*)?\r?\n", boost::regex::icase | boost::regex::normal); + // 1 2 34 5 + //size_t match_len = 0; + boost::smatch result; + if (boost::regex_search(m_header_cache, result, rexp_match_first_response_line, boost::match_default) && result[0].matched) + { + CHECK_AND_ASSERT_MES(result[1].matched&&result[2].matched, false, "http_stream_filter::handle_invoke_reply_line() assert failed..."); + m_response_info.m_http_ver_hi = boost::lexical_cast(result[1]); + m_response_info.m_http_ver_lo = boost::lexical_cast(result[2]); + m_response_info.m_response_code = boost::lexical_cast(result[3]); - } - inline - bool set_reply_content_encoder() - { - STATIC_REGEXP_EXPR_1(rexp_match_gzip, "^.*?((gzip)|(deflate))", boost::regex::icase | boost::regex::normal); - boost::smatch result; // 12 3 - if(boost::regex_search( m_response_info.m_header_info.m_content_encoding, result, rexp_match_gzip, boost::match_default) && result[0].matched) - { + m_header_cache.erase(to_nonsonst_iterator(m_header_cache, result[0].first), to_nonsonst_iterator(m_header_cache, result[0].second)); + return true; + } + else + { + LOG_ERROR("http_stream_filter::handle_invoke_reply_line(): Failed to match first response line:" << m_header_cache); + return false; + } + + } + inline + bool set_reply_content_encoder() + { + STATIC_REGEXP_EXPR_1(rexp_match_gzip, "^.*?((gzip)|(deflate))", boost::regex::icase | boost::regex::normal); + boost::smatch result; // 12 3 + if (boost::regex_search(m_response_info.m_header_info.m_content_encoding, result, rexp_match_gzip, boost::match_default) && result[0].matched) + { #ifdef HTTP_ENABLE_GZIP - m_pcontent_encoding_handler.reset(new content_encoding_gzip(this, result[3].matched)); + m_pcontent_encoding_handler.reset(new content_encoding_gzip(this, result[3].matched)); #else - m_pcontent_encoding_handler.reset(new do_nothing_sub_handler(this)); - LOG_ERROR("GZIP encoding not supported in this build, please add zlib to your project and define HTTP_ENABLE_GZIP"); - return false; + m_pcontent_encoding_handler.reset(new do_nothing_sub_handler(this)); + LOG_ERROR("GZIP encoding not supported in this build, please add zlib to your project and define HTTP_ENABLE_GZIP"); + return false; #endif - } - else - { - m_pcontent_encoding_handler.reset(new do_nothing_sub_handler(this)); - } + } + else + { + m_pcontent_encoding_handler.reset(new do_nothing_sub_handler(this)); + } - return true; - } - inline - bool analize_cached_header_and_invoke_state() - { - m_response_info.clear(); - analize_first_response_line(); - std::string fake_str; //gcc error workaround + return true; + } + inline + bool analize_cached_header_and_invoke_state() + { + m_response_info.clear(); + analize_first_response_line(); + std::string fake_str; //gcc error workaround - bool res = parse_header(m_response_info.m_header_info, m_header_cache); - CHECK_AND_ASSERT_MES(res, false, "http_stream_filter::analize_cached_reply_header_and_invoke_state(): failed to anilize reply header: " << m_header_cache); + bool res = parse_header(m_response_info.m_header_info, m_header_cache); + CHECK_AND_ASSERT_MES(res, false, "http_stream_filter::analize_cached_reply_header_and_invoke_state(): failed to anilize reply header: " << m_header_cache); - set_reply_content_encoder(); + set_reply_content_encoder(); - m_len_in_summary = 0; - bool content_len_valid = false; - if(m_response_info.m_header_info.m_content_length.size()) - content_len_valid = string_tools::get_xtype_from_string(m_len_in_summary, m_response_info.m_header_info.m_content_length); + m_len_in_summary = 0; + bool content_len_valid = false; + if (m_response_info.m_header_info.m_content_length.size()) + content_len_valid = string_tools::get_xtype_from_string(m_len_in_summary, m_response_info.m_header_info.m_content_length); - if(!m_len_in_summary && ((m_response_info.m_response_code>=100&&m_response_info.m_response_code<200) - || 204 == m_response_info.m_response_code - || 304 == m_response_info.m_response_code) ) - {//There will be no response body, server will display the local page with error - m_state = reciev_machine_state_done; - return true; - }else if(m_response_info.m_header_info.m_transfer_encoding.size()) - { - string_tools::trim(m_response_info.m_header_info.m_transfer_encoding); - if(string_tools::compare_no_case(m_response_info.m_header_info.m_transfer_encoding, "chunked")) - { - LOG_ERROR("Wrong Transfer-Encoding:" << m_response_info.m_header_info.m_transfer_encoding); - m_state = reciev_machine_state_error; - return false; - } - m_state = reciev_machine_state_body_chunked; - m_chunked_state = http_chunked_state_chunk_head; - return true; - } - else if(!m_response_info.m_header_info.m_content_length.empty()) - { - //In the response header the length was specified - if(!content_len_valid) - { - LOG_ERROR("http_stream_filter::analize_cached_reply_header_and_invoke_state(): Failed to get_len_from_content_lenght();, m_query_info.m_content_length="< + bool invoke_request(const std::string& url, t_transport& tr, unsigned int timeout, const http_response_info** ppresponse_info, const std::string& method = "GET", const std::string& body = std::string(), const fields_list& additional_params = fields_list()) { - CHECK_AND_ASSERT_MES(res, false, "failed to parse url: " << url); + http::url_content u_c; + bool res = parse_url(url, u_c); - if(!u_c.port) - u_c.port = 80;//default for http + if (!tr.is_connected() && !u_c.host.empty()) + { + CHECK_AND_ASSERT_MES(res, false, "failed to parse url: " << url); - res = tr.connect(u_c.host, static_cast(u_c.port), timeout); - CHECK_AND_ASSERT_MES(res, false, "failed to connect " << u_c.host << ":" << u_c.port); + if (!u_c.port) + u_c.port = 80;//default for http + + if (!tr.connect(u_c.host, static_cast(u_c.port), timeout)) + { + LOG_PRINT_L2("invoke_request: cannot connect to " << u_c.host << ":" << u_c.port); + return false; + } + } + + return tr.invoke(u_c.uri, method, body, ppresponse_info, additional_params); } - return tr.invoke(u_c.uri, method, body, ppresponse_info, additional_params); - } + struct idle_handler_base + { + virtual bool do_call(const std::string& piece_of_data, uint64_t total_bytes, uint64_t received_bytes) = 0; + virtual ~idle_handler_base() {} + }; - } -} -} + template + struct idle_handler : public idle_handler_base + { + callback_t m_cb; + + idle_handler(callback_t cb) : m_cb(cb) {} + virtual bool do_call(const std::string& piece_of_data, uint64_t total_bytes, uint64_t received_bytes) + { + return m_cb(piece_of_data, total_bytes, received_bytes); + } + }; + + class interruptible_http_client : public http_simple_client + { + std::shared_ptr m_pcb; + bool m_permanent_error = false; + + virtual bool handle_target_data(std::string& piece_of_transfer) + { + bool r = m_pcb->do_call(piece_of_transfer, m_len_in_summary, m_len_in_summary - m_len_in_remain); + piece_of_transfer.clear(); + return r; + } + + public: + template + bool invoke_cb(callback_t cb, const std::string& url, uint64_t timeout, const std::string& method = "GET", const std::string& body = std::string(), const fields_list& additional_params = fields_list()) + { + m_pcb.reset(new idle_handler(cb)); + const http_response_info* p_hri = nullptr; + bool r = invoke_request(url, *this, timeout, &p_hri, method, body, additional_params); + if (p_hri && !(p_hri->m_response_code >= 200 && p_hri->m_response_code < 300)) + { + LOG_PRINT_L0("HTTP request to " << url << " failed with code: " << p_hri->m_response_code); + m_permanent_error = true; + return false; + } + return r; + } + + template + bool download(callback_t cb, const std::string& path_for_file, const std::string& url, uint64_t timeout, const std::string& method = "GET", const std::string& body = std::string(), const fields_list& additional_params = fields_list()) + { + std::ofstream fs; + fs.open(path_for_file, std::ios::binary | std::ios::out | std::ios::trunc); + if (!fs.is_open()) + { + LOG_ERROR("Fsiled to open " << path_for_file); + return false; + } + auto local_cb = [&](const std::string& piece_of_data, uint64_t total_bytes, uint64_t received_bytes) + { + fs.write(piece_of_data.data(), piece_of_data.size()); + return cb(total_bytes, received_bytes); + }; + bool r = this->invoke_cb(local_cb, url, timeout, method, body, additional_params); + fs.close(); + return r; + } + + // + template + bool download_and_unzip(callback_t cb, const std::string& path_for_file, const std::string& url, uint64_t timeout, const std::string& method = "GET", const std::string& body = std::string(), uint64_t fails_count = 1000, const fields_list& additional_params = fields_list()) + { + std::ofstream fs; + fs.open(path_for_file, std::ios::binary | std::ios::out | std::ios::trunc); + if (!fs.is_open()) + { + LOG_ERROR("Fsiled to open " << path_for_file); + return false; + } + std::string buff; + gzip_decoder_lambda zip_decoder; + uint64_t state_total_bytes = 0; + uint64_t state_received_bytes_base = 0; + uint64_t state_received_bytes_current = 0; + bool stopped = false; + auto local_cb = [&](const std::string& piece_of_data, uint64_t total_bytes, uint64_t received_bytes) + { + //remember total_bytes only for first attempt, where fetched full lenght of the file + if (!state_total_bytes) + state_total_bytes = total_bytes; + + buff += piece_of_data; + return zip_decoder.update_in(buff, [&](const std::string& unpacked_buff) + { + state_received_bytes_current = received_bytes; + fs.write(unpacked_buff.data(), unpacked_buff.size()); + stopped = !cb(unpacked_buff, state_total_bytes, state_received_bytes_base + received_bytes); + return !stopped; + }); + }; + uint64_t current_err_count = 0; + bool r = false; + m_permanent_error = false; + while (!r && current_err_count < fails_count) + { + LOG_PRINT_L0("Attempt " << current_err_count + 1 << "/" << fails_count << " to get " << url << " (offset:" << state_received_bytes_base << ")"); + fields_list additional_params_local = additional_params; + additional_params_local.push_back(std::make_pair("Range", std::string("bytes=") + std::to_string(state_received_bytes_base) + "-")); + r = this->invoke_cb(local_cb, url, timeout, method, body, additional_params_local); + if (!r) + { + if (stopped || m_permanent_error) + break; + current_err_count++; + state_received_bytes_base += state_received_bytes_current; + state_received_bytes_current = 0; + boost::this_thread::sleep_for(boost::chrono::milliseconds(2000)); + } + } + + if (current_err_count >= fails_count) + { + LOG_PRINT_YELLOW("Downloading from " << url << " FAILED as it's reached maximum (" << fails_count << ") number of attempts. Downloaded " << state_received_bytes_base << " bytes.", LOG_LEVEL_0); + } + else if (m_permanent_error) + { + LOG_PRINT_YELLOW("Downloading from " << url << " FAILED due to permanent HTTP error. Downloaded " << state_received_bytes_base << " bytes.", LOG_LEVEL_0); + } + + fs.close(); + return r; + } + }; + + + } // namespace http + + } // namespace net_utils +} // namespace epee \ No newline at end of file