Source code for dxpy

# Copyright (C) 2013-2016 DNAnexus, Inc.
#
# This file is part of dx-toolkit (DNAnexus platform client libraries).
#
#   Licensed under the Apache License, Version 2.0 (the "License"); you may not
#   use this file except in compliance with the License. You may obtain a copy
#   of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#   License for the specific language governing permissions and limitations
#   under the License.

'''
When this package is imported, configuration values will be loaded from
the following sources in order of decreasing priority:

1. Environment variables
2. Values stored in ``~/.dnanexus_config/environment``
3. Hardcoded defaults

The bindings are configured by the following environment variables:

.. envvar:: DX_SECURITY_CONTEXT

   A JSON hash containing your auth token, typically of the form
   ``{"auth_token_type": "Bearer", "auth_token": "YOUR_TOKEN"}``.

.. envvar:: DX_APISERVER_PROTOCOL

   Either ``http`` or ``https`` (usually ``https``).

.. envvar:: DX_APISERVER_HOST

   Hostname of the DNAnexus API server.

.. envvar:: DX_APISERVER_PORT

   Port of the DNAnexus API server.

.. envvar:: DX_JOB_ID

   Should only be present if run in an Execution Environment; indicates
   the ID of the currently running job.

.. envvar:: DX_WORKSPACE_ID

   Should only be present if run in an Execution Environment; indicates
   the running job's temporary workspace ID.

.. envvar:: DX_PROJECT_CONTEXT_ID

   Indicates either the project context of a running job, or the default
   project to use for a user accessing the platform from the outside.

The following fields can be used to read the current configuration
values:

.. py:data:: APISERVER_PROTOCOL

   Protocol being used to access the DNAnexus API server. Either
   ``http`` or ``https`` (usually ``https``).

.. py:data:: APISERVER_HOST

   Hostname of the DNAnexus API server.

.. py:data:: APISERVER_PORT

   Port of the DNAnexus API server.

.. py:data:: JOB_ID

   Indicates the ID of the currently running job, or None if we are not
   in an Execution Environment.

.. py:data:: WORKSPACE_ID

   Indicates the temporary workspace ID of the currently running job, or
   the current project if we are not in an Execution Environment.

.. py:data:: PROJECT_CONTEXT_ID

   Indicates either the project context of a running job, if there is
   one, or the default project that is being used, for users accessing
   the platform from the outside.

.. py:data:: USER_AGENT

   The user agent string that dxpy will send to the server with each request.

The :func:`dxpy.DXHTTPRequest` function uses the ``DX_SECURITY_CONTEXT``
and ``DX_APISERVER_*`` variables to select an API server and provide
appropriate authentication headers to it. (Note: all methods in the
:mod:`dxpy.api` module, and by extension any of the bindings methods
that make API calls, use this function.)

All object handler methods that require a project or data container ID
use by default the ``DX_WORKSPACE_ID`` (if running inside an Execution
Environment) or ``DX_PROJECT_CONTEXT_ID`` (otherwise).

The following functions can be used to override any of the settings
obtained from the environment for the duration of the session:

* :func:`dxpy.set_security_context`: to specify an authentication token
* :func:`dxpy.set_api_server_info`: to specify the API server (host, port, or protocol)
* :func:`dxpy.set_workspace_id`: to specify the default data container

To pass API server requests through an HTTP(S) proxy, set the following
environment variables:

.. envvar:: HTTP_PROXY

   HTTP proxy, in the form 'protocol://hostname:port' (e.g. 'http://10.10.1.10:3128')

.. envvar:: HTTPS_PROXY

   HTTPS proxy, in the form 'protocol://hostname:port'

'''

from __future__ import print_function, unicode_literals, division, absolute_import

import logging

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())

import os, sys, json, time, platform, ssl, traceback
import errno
import math
import socket
import threading
import certifi
from collections import namedtuple

from . import exceptions
from .compat import BadStatusLine, StringIO, bytes, Repr
from .utils.printing import BOLD, BLUE, YELLOW, GREEN, RED, WHITE

from random import randint
import urllib3
from threading import Lock
from urllib.parse import urlsplit

sequence_number_mutex = threading.Lock()
counter = 0

def _get_sequence_number():
    global counter
    with sequence_number_mutex:
        retval = counter
        counter += 1
        return retval


[docs]def configure_urllib3(): # Disable verbose urllib3 warnings and log messages urllib3.disable_warnings(category=urllib3.exceptions.InsecurePlatformWarning) logging.getLogger('urllib3.connectionpool').setLevel(logging.ERROR)
configure_urllib3() from .toolkit_version import version as TOOLKIT_VERSION __version__ = TOOLKIT_VERSION API_VERSION = '1.0.0' AUTH_HELPER, SECURITY_CONTEXT = None, None JOB_ID, WATCH_PORT, WORKSPACE_ID, PROJECT_CONTEXT_ID = None, None, None, None DEFAULT_APISERVER_PROTOCOL = 'https' DEFAULT_APISERVER_HOST = 'api.dnanexus.com' DEFAULT_APISERVER_PORT = '443' APISERVER_PROTOCOL = DEFAULT_APISERVER_PROTOCOL APISERVER_HOST = DEFAULT_APISERVER_HOST APISERVER_PORT = DEFAULT_APISERVER_PORT DEFAULT_RETRIES = 6 DEFAULT_TIMEOUT = 905 _DEBUG = 0 # debug verbosity level _UPGRADE_NOTIFY = True INCOMPLETE_READS_NUM_SUBCHUNKS = 8 USER_AGENT = "{name}/{version} ({platform}) Python/{python_version}".format(name=__name__, version=TOOLKIT_VERSION, platform=platform.platform(), python_version=platform.python_version()) _default_certs = certifi.where() _default_headers = { "User-Agent": USER_AGENT, "Accept-Encoding": "gzip, deflate", "Accept": "*/*", "Connection": "keep-alive", } _default_timeout = urllib3.util.timeout.Timeout(connect=DEFAULT_TIMEOUT, read=DEFAULT_TIMEOUT) _RequestForAuth = namedtuple('_RequestForAuth', 'method url headers') _expected_exceptions = (exceptions.network_exceptions, exceptions.DXAPIError, BadStatusLine, exceptions.BadJSONInReply, exceptions.UrllibInternalError) # Multiple threads can ask for the pool, so we need to protect # access and make it thread safe. _pool_mutex = Lock() _pool_manager = None def _get_proxy_info(url): proxy_info = {} url_info = urlsplit(url) # If the url contains a username, need to separate the username/password # from the url if url_info.username: # Strip the username/password out of the url url = url_info.netloc[url_info.netloc.find('@')+1:] # Now get the username and possibly password proxy_info['proxy_url'] = '{0}://{1}'.format(url_info.scheme, url) if url_info.password: proxy_auth = '{0}:{1}'.format(url_info.username, url_info.password) else: proxy_auth = url_info.username proxy_info['proxy_headers'] = urllib3.make_headers(proxy_basic_auth=proxy_auth) else: # No username was given, so just take the url as is. proxy_info['proxy_url'] = url return proxy_info def _get_env_var_proxy(print_proxy=False): proxy_tuple = ('http_proxy', 'HTTP_PROXY', 'https_proxy', 'HTTPS_PROXY') proxy = None for env_proxy in proxy_tuple: if env_proxy in os.environ: proxy = os.environ[env_proxy] if print_proxy: print('Using env variable %s=%s as proxy' % (env_proxy,proxy), file=sys.stderr) return proxy def _get_pool_manager(verify, cert_file, key_file, ssl_context=None): global _pool_manager default_pool_args = dict(maxsize=32, cert_reqs=ssl.CERT_REQUIRED, headers=_default_headers, timeout=_default_timeout) # Don't use the default CA bundle if the user has set the env variable # DX_USE_OS_CA_BUNDLE. Enabling that var will make us attempt to load # the default CA certs provided by the OS; see DEVEX-875. if 'DX_USE_OS_CA_BUNDLE' not in os.environ: default_pool_args.update(ca_certs=_default_certs) if cert_file is None and verify is None and 'DX_CA_CERT' not in os.environ: with _pool_mutex: if _pool_manager is None: if _get_env_var_proxy(): proxy_params = _get_proxy_info(_get_env_var_proxy(print_proxy=True)) default_pool_args.update(proxy_params) _pool_manager = urllib3.ProxyManager(**default_pool_args) else: _pool_manager = urllib3.PoolManager(**default_pool_args) return _pool_manager else: # This is the uncommon case, normally, we want to cache the pool # manager. pool_args = dict(default_pool_args, cert_file=cert_file, key_file=key_file, ssl_context=ssl_context, ca_certs=verify or os.environ.get('DX_CA_CERT') or certifi.where()) if verify is False or os.environ.get('DX_CA_CERT') == 'NOVERIFY': pool_args.update(cert_reqs=ssl.CERT_NONE, ca_certs=None) urllib3.disable_warnings() if _get_env_var_proxy(): proxy_params = _get_proxy_info(_get_env_var_proxy(print_proxy=True)) pool_args.update(proxy_params) return urllib3.ProxyManager(**pool_args) else: return urllib3.PoolManager(**pool_args) def _process_method_url_headers(method, url, headers): if callable(url): _url, _headers = url() _headers.update(headers) else: _url, _headers = url, headers return method, _url, _headers # When any of the following errors are indicated, we are sure that the # server never received our request and therefore the request can be # retried (even if the request is not idempotent). _RETRYABLE_SOCKET_ERRORS = { errno.ENETDOWN, # The network was down errno.ENETUNREACH, # The subnet containing the remote host was unreachable errno.ECONNREFUSED # A remote host refused to allow the network connection } _RETRYABLE_WITH_RESPONSE = (exceptions.ContentLengthError, BadStatusLine, exceptions.BadJSONInReply, ConnectionResetError, urllib3.exceptions.ProtocolError, exceptions.UrllibInternalError) def _is_retryable_exception(e): """Returns True if the exception is always safe to retry. This is True if the client was never able to establish a connection to the server (for example, name resolution failed or the connection could otherwise not be initialized). Conservatively, if we can't tell whether a network connection could have been established, we return False. """ if isinstance(e, urllib3.exceptions.ProtocolError): return True if isinstance(e, ConnectionResetError): return True if isinstance(e, (socket.gaierror, socket.herror)): return True if isinstance(e, socket.error) and e.errno in _RETRYABLE_SOCKET_ERRORS: return True if isinstance(e, urllib3.exceptions.NewConnectionError): return True if isinstance(e, urllib3.exceptions.SSLError): return True if isinstance(e, ssl.SSLError): return True return False def _extract_msg_from_last_exception(): ''' Extract a useful error message from the last thrown exception ''' last_exc_type, last_error, last_traceback = sys.exc_info() if isinstance(last_error, exceptions.DXAPIError): # Using the same code path as below would not # produce a useful message when the error contains a # 'details' hash (which would have a last line of # '}') return last_error.error_message() else: return traceback.format_exception_only(last_exc_type, last_error)[-1].strip() def _calculate_retry_delay(response, num_attempts): ''' Returns the time in seconds that we should wait. :param num_attempts: number of attempts that have been made to the resource, including the most recent failed one :type num_attempts: int ''' if response is not None and response.status == 503 and 'retry-after' in response.headers: try: return int(response.headers['retry-after']) except ValueError: # In RFC 2616, retry-after can be formatted as absolute time # instead of seconds to wait. We don't bother to parse that, # but the apiserver doesn't generate such responses anyway. pass if num_attempts <= 1: return 1 num_attempts = min(num_attempts, 7) return randint(2 ** (num_attempts - 2), 2 ** (num_attempts - 1)) # Truncate the message, if the error injection flag is on, and other # conditions hold. This causes a BadRequest 400 HTTP code, which is # subsequentally retried. # # Note: the minimal upload size for S3 is 5MB. In theory, you are # supposed to get an "EntityTooSmall" error from S3, which has a 400 # code. However, I have not observed such responses in practice. # http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html def _maybe_truncate_request(url, data): MIN_UPLOAD_LEN = 16 * 1024 if _INJECT_ERROR: if (randint(0, 9) == 0) and "upload" in url and len(data) > MIN_UPLOAD_LEN: logger.info("truncating upload data to length=%d", MIN_UPLOAD_LEN) return data[0:MIN_UPLOAD_LEN] return data def _raise_error_for_testing(try_index=None, method='GET'): if _INJECT_ERROR and method == 'GET' and randint(0, 9) == 0: error_thrown = randint(0, 1) if error_thrown == 0 and try_index is None: raise exceptions.DXIncompleteReadsError() # Raise exception to test urllib3 error in downloads elif error_thrown == 1 and try_index is not None and try_index < 3: raise exceptions.UrllibInternalError() def _debug_print_request(debug_level, seq_num, time_started, method, url, headers, jsonify_data, data): if debug_level >= 2: if not jsonify_data: if len(data) == 0: formatted_data = '""' else: formatted_data = "<file data of length " + str(len(data)) + ">" else: try: if _DEBUG >= 3: formatted_data = json.dumps(data, indent=2) else: formatted_data = json.dumps(data) except (UnicodeDecodeError, TypeError): formatted_data = "<binary data>" printable_headers = '' if 'Range' in headers: printable_headers = " " + json.dumps({"Range": headers["Range"]}) print("%s [%f] %s %s%s => %s\n" % (YELLOW(BOLD(">%d" % seq_num)), time_started, BLUE(method), url, printable_headers, formatted_data), file=sys.stderr, end="") elif debug_level > 0: print("%s [%f] %s %s => %s\n" % (YELLOW(BOLD(">%d" % seq_num)), time_started, BLUE(method), url, Repr().repr(data)), file=sys.stderr, end="") def _debug_print_response(debug_level, seq_num, time_started, req_id, response_status, response_was_json, method, url, content): if debug_level > 0: if response_was_json: if debug_level >= 3: content_to_print = "\n " + json.dumps(content, indent=2).replace("\n", "\n ") elif debug_level == 2: content_to_print = json.dumps(content) else: content_to_print = Repr().repr(content) else: content_to_print = "(%d bytes)" % len(content) if len(content) > 0 else '' t = int((time.time() - time_started) * 1000) code_format = GREEN if (200 <= response_status < 300) else RED print(" " + YELLOW(BOLD("<%d" % seq_num)), "[%f]" % time_started, BLUE(method), req_id, url, "<=", code_format(str(response_status)), WHITE(BOLD("(%dms)" % t)), content_to_print, file=sys.stderr)
[docs]def DXHTTPRequest(resource, data, method='POST', headers=None, auth=True, timeout=DEFAULT_TIMEOUT, use_compression=None, jsonify_data=True, want_full_response=False, decode_response_body=True, prepend_srv=True, session_handler=None, max_retries=DEFAULT_RETRIES, always_retry=False, **kwargs): ''' :param resource: API server route, e.g. "/record/new". If *prepend_srv* is False, a fully qualified URL is expected. If this argument is a callable, it will be called just before each request attempt, and expected to return a tuple (URL, headers). Headers returned by the callback are updated with *headers* (including headers set by this method). :type resource: string :param data: Content of the request body :type data: list or dict, if *jsonify_data* is True; or string or file-like object, otherwise :param headers: Names and values of HTTP headers to submit with the request (in addition to those needed for authentication, compression, or other options specified with the call). :type headers: dict :param auth: Controls the ``Authentication`` header or other means of authentication supplied with the request. If ``True`` (default), a token is obtained from the ``DX_SECURITY_CONTEXT``. If the value evaluates to false, no action is taken to prepare authentication for the request. Otherwise, the value is assumed to be callable, and called with three arguments (method, url, headers) and expected to prepare the authentication headers by reference. :type auth: tuple, object, True (default), or None :param timeout: HTTP request timeout, in seconds :type timeout: float :param use_compression: Deprecated :type use_compression: string or None :param jsonify_data: If True, *data* is converted from a Python list or dict to a JSON string :type jsonify_data: boolean :param want_full_response: If True, the full :class:`urllib3.response.HTTPResponse` object is returned (otherwise, only the content of the response body is returned) :type want_full_response: boolean :param decode_response_body: If True (and *want_full_response* is False), the response body is decoded and, if it is a JSON string, deserialized. Otherwise, the response body is uncompressed if transport compression is on, and returned raw. :type decode_response_body: boolean :param prepend_srv: If True, prepends the API server location to the URL :type prepend_srv: boolean :param session_handler: Deprecated. :param max_retries: Maximum number of retries to perform for a request. A "failed" request is retried if any of the following is true: - A response is received from the server, and the content length received does not match the "Content-Length" header. - A response is received from the server, and the response has an HTTP status code in 5xx range. - A response is received from the server, the "Content-Length" header is not set, and the response JSON cannot be parsed. - No response is received from the server, and either *always_retry* is True or the request *method* is "GET". :type max_retries: int :param always_retry: If True, indicates that it is safe to retry a request on failure - Note: It is not guaranteed that the request will *always* be retried on failure; rather, this is an indication to the function that it would be safe to do so. :type always_retry: boolean :returns: Response from API server in the format indicated by *want_full_response* and *decode_response_body*. :raises: :exc:`exceptions.DXAPIError` or a subclass if the server returned a non-200 status code; :exc:`urllib3.exceptions.HTTPError` if an invalid response was received from the server; or :exc:`urllib3.exceptions.ConnectionError` if a connection cannot be established. Wrapper around :meth:`urllib3.request()` that makes an HTTP request, inserting authentication headers and (by default) converting *data* to JSON. .. note:: Bindings methods that make API calls make the underlying HTTP request(s) using :func:`DXHTTPRequest`, and most of them will pass any unrecognized keyword arguments you have supplied through to :func:`DXHTTPRequest`. ''' if headers is None: headers = {} global _UPGRADE_NOTIFY seq_num = _get_sequence_number() url = APISERVER + resource if prepend_srv else resource method = method.upper() # Convert method name to uppercase, to ease string comparisons later if auth is True: auth = AUTH_HELPER if auth: auth(_RequestForAuth(method, url, headers)) pool_args = {arg: kwargs.pop(arg, None) for arg in ("verify", "cert_file", "key_file", "ssl_context")} test_retry = kwargs.pop("_test_retry_http_request", False) # data is a sequence/buffer or a dict # serialized_data is a sequence/buffer if jsonify_data: serialized_data = json.dumps(data) if 'Content-Type' not in headers and method == 'POST': headers['Content-Type'] = 'application/json' else: serialized_data = data # If the input is a buffer, its data gets consumed by # requests.request (moving the read position). Record the initial # buffer position so that we can return to it if the request fails # and needs to be retried. rewind_input_buffer_offset = None if hasattr(data, 'seek') and hasattr(data, 'tell'): rewind_input_buffer_offset = data.tell() # Maintain two separate counters for the number of tries... try_index = 0 # excluding 503 errors. The number of tries as given here # cannot exceed (max_retries + 1). try_index_including_503 = 0 # including 503 errors. This number is used to # do exponential backoff. retried_responses = [] _url = None redirect_url = None while True: success, time_started = True, None response = None req_id = None try: time_started = time.time() _method, _url, _headers = _process_method_url_headers(method, url, headers) _debug_print_request(_DEBUG, seq_num, time_started, _method, _url, _headers, jsonify_data, data) body = _maybe_truncate_request(_url, serialized_data) # throws BadStatusLine if the server returns nothing try: pool_manager = _get_pool_manager(**pool_args) _headers['User-Agent'] = USER_AGENT _headers['DNAnexus-API'] = API_VERSION # Converted Unicode headers to ASCII and throw an error if not possible def ensure_ascii(i): if not isinstance(i, bytes): i = i.encode('ascii') return i _headers = {ensure_ascii(k): ensure_ascii(v) for k, v in _headers.items()} # This is needed for python 3 urllib _headers.pop(b'host', None) _headers.pop(b'content-length', None) _headers.pop(b'Content-Length', None) # The libraries downstream (http client) require elimination of non-ascii # chars from URL. # We check if the URL contains non-ascii characters to see if we need to # quote it. It is important not to always quote the path (here: parts[2]) # since it might contain elements (e.g. HMAC for api proxy) containing # special characters that should not be quoted. try: ensure_ascii(_url) encoded_url = _url except UnicodeEncodeError: import urllib.parse parts = list(urllib.parse.urlparse(_url)) parts[2] = urllib.parse.quote(parts[2]) encoded_url = urllib.parse.urlunparse(parts) response = pool_manager.request(_method, encoded_url, headers=_headers, body=body, timeout=timeout, retries=False, **kwargs) except urllib3.exceptions.ClosedPoolError: # If another thread closed the pool before the request was # started, will throw ClosedPoolError raise exceptions.UrllibInternalError("ClosedPoolError") _raise_error_for_testing(try_index, method) req_id = response.headers.get("x-request-id", "unavailable") if (_UPGRADE_NOTIFY and response.headers.get('x-upgrade-info', '').startswith('A recommended update is available') and '_ARGCOMPLETE' not in os.environ): logger.info(response.headers['x-upgrade-info']) try: with open(_UPGRADE_NOTIFY, 'a'): os.utime(_UPGRADE_NOTIFY, None) except: pass _UPGRADE_NOTIFY = False # Handle redirection manually for symlink files if response.status // 100 == 3: redirect_url = response.headers.get('Location') if not redirect_url: raise exceptions.UrllibInternalError("Location not found in redirect response", response.status) break # If an HTTP code that is not in the 200 series is received and the content is JSON, parse it and throw the # appropriate error. Otherwise, raise the usual exception. if response.status // 100 != 2: # response.headers key lookup is case-insensitive if response.headers.get('content-type', '').startswith('application/json'): try: content = response.data.decode('utf-8') except AttributeError: raise exceptions.UrllibInternalError("Content is none", response.status) try: content = json.loads(content) except ValueError: # The JSON is not parsable, but we should be able to retry. raise exceptions.BadJSONInReply("Invalid JSON received from server", response.status) try: error_class = getattr(exceptions, content["error"]["type"], exceptions.DXAPIError) except (KeyError, AttributeError, TypeError): raise exceptions.HTTPErrorWithContent("Appropriate error class not found. [HTTPCode=%s]" % response.status, content) raise error_class(content, response.status, time_started, req_id) else: try: content = response.data.decode('utf-8') except AttributeError: raise exceptions.UrllibInternalError("Content is none", response.status) raise exceptions.HTTPErrorWithContent("{} {} [Time={} RequestID={}]".format(response.status, response.reason, time_started, req_id), content.strip()) if want_full_response: return response else: if 'content-length' in response.headers: if int(response.headers['content-length']) != len(response.data): range_str = (' (%s)' % (headers['Range'],)) if 'Range' in headers else '' raise exceptions.ContentLengthError( "Received response with content-length header set to %s but content length is %d%s. " + "[Time=%f RequestID=%s]" % (response.headers['content-length'], len(response.data), range_str, time_started, req_id) ) content = response.data response_was_json = False if decode_response_body: content = content.decode('utf-8') if response.headers.get('content-type', '').startswith('application/json'): try: content = json.loads(content) except ValueError: # The JSON is not parsable, but we should be able to retry. raise exceptions.BadJSONInReply("Invalid JSON received from server", response.status) else: response_was_json = True req_id = response.headers.get('x-request-id') or "--" _debug_print_response(_DEBUG, seq_num, time_started, req_id, response.status, response_was_json, _method, _url, content) if test_retry: retried_responses.append(content) if len(retried_responses) == 1: continue else: _set_retry_response(retried_responses[0]) return retried_responses[1] return content raise AssertionError('Should never reach this line: expected a result to have been returned by now') except Exception as e: # Avoid reusing connections in the pool, since they may be # in an inconsistent state (observed as "ResponseNotReady" # errors). _get_pool_manager(**pool_args).clear() success = False exception_msg = _extract_msg_from_last_exception() if isinstance(e, _expected_exceptions): # Total number of allowed tries is the initial try PLUS # up to (max_retries) subsequent retries. total_allowed_tries = max_retries + 1 ok_to_retry = False is_retryable = always_retry or (method == 'GET') or _is_retryable_exception(e) # Because try_index is not incremented until we escape # this iteration of the loop, try_index is equal to the # number of tries that have failed so far, minus one. if try_index + 1 < total_allowed_tries: # BadStatusLine --- server did not return anything # BadJSONInReply --- server returned JSON that didn't parse properly if (response is None or isinstance(e, _RETRYABLE_WITH_RESPONSE)): ok_to_retry = is_retryable else: ok_to_retry = 500 <= response.status < 600 # The server has closed the connection prematurely if (response is not None and response.status == 400 and is_retryable and method == 'PUT' and isinstance(e, urllib3.exceptions.HTTPError)): request_timeout_str = '<Code>RequestTimeout</Code>' if (request_timeout_str in exception_msg or (isinstance(e, exceptions.HTTPErrorWithContent) and request_timeout_str in e.content)): logger.info("Retrying 400 HTTP error, due to slow data transfer. " + "Request Time=%f Request ID=%s", time_started, req_id) else: logger.info("400 HTTP error, of unknown origin, exception_msg=[%s]. " + "Request Time=%f Request ID=%s", exception_msg, time_started, req_id) ok_to_retry = True # Unprocessable entity, request has semantical errors if response is not None and response.status == 422: ok_to_retry = False if ok_to_retry: if rewind_input_buffer_offset is not None: data.seek(rewind_input_buffer_offset) delay = _calculate_retry_delay(response, try_index_including_503 + 1) range_str = (' (range=%s)' % (headers['Range'],)) if 'Range' in headers else '' if response is not None and response.status == 503: waiting_msg = 'Waiting %d seconds before retry...' % (delay,) else: waiting_msg = 'Waiting %d seconds before retry %d of %d...' % ( delay, try_index + 1, max_retries) log_msg = "[%s] %s %s: %s. %s %s" % (time.ctime(), method, _url, exception_msg, waiting_msg, range_str) if isinstance(e, exceptions.HTTPErrorWithContent): log_msg += "\n%s" % e.content logger.warning(log_msg) time.sleep(delay) try_index_including_503 += 1 if response is None or response.status != 503: try_index += 1 continue # All retries have been exhausted OR the error is deemed not # retryable. Print the latest error and propagate it back to the caller. if not isinstance(e, exceptions.DXAPIError): log_msg = "[%s] %s %s: %s." % (time.ctime(), method, _url, exception_msg) if isinstance(e, exceptions.HTTPErrorWithContent): log_msg += "\n%s" % e.content logger.error(log_msg) # Retries have been exhausted, and we are unable to get a full # buffer from the data source. Raise a special exception. if isinstance(e, urllib3.exceptions.ProtocolError) and \ 'Connection broken: IncompleteRead' in exception_msg: raise exceptions.DXIncompleteReadsError(exception_msg) raise finally: if success and try_index > 0: logger.info("[%s] %s %s: Recovered after %d retries", time.ctime(), method, _url, try_index) raise AssertionError('Should never reach this line: should have attempted a retry or reraised by now') # Make a new request to the URL specified in the Location header if we got a redirect_url if redirect_url: return DXHTTPRequest(redirect_url, body, method=method, headers=headers, auth=auth, timeout=timeout, use_compression=use_compression, jsonify_data=jsonify_data, want_full_response=want_full_response, decode_response_body=decode_response_body, prepend_srv=prepend_srv, session_handler=session_handler, max_retries=max_retries, always_retry=always_retry, **kwargs) raise AssertionError('Should never reach this line: should never break out of loop')
[docs]class DXHTTPOAuth2(): def __init__(self, security_context): self.security_context = security_context def __call__(self, r): if self.security_context["auth_token_type"].lower() == 'bearer': auth_header = self.security_context["auth_token_type"] + " " + self.security_context["auth_token"] r.headers[b'Authorization'] = auth_header.encode() else: raise NotImplementedError("Token types other than bearer are not yet supported") return r
''' This function is used for reading a part of an S3 object. It returns a string containing the data. If there is an error, and exception is thrown. There is special handling if a DXIncompleteReadsError is thrown, for which urllib3 gets only part of the requested range from the chunk of data. The range is split into smaller chunks, and each sub-chunk is tried in a DXHTTPRequest. The smaller chunks are then concatenated to form the original range of data. If a DXIncompleteReadsError is thrown (after retrying the sub-chunk 6 times) while reading a sub-chunk, then we fail. ''' def _dxhttp_read_range(url, headers, start_pos, end_pos, timeout, sub_range=True): if sub_range: headers['Range'] = "bytes=" + str(start_pos) + "-" + str(end_pos) try: data = DXHTTPRequest(url, '', method='GET', headers=headers, auth=None, jsonify_data=False, prepend_srv=False, always_retry=True, timeout=timeout, decode_response_body=False) _raise_error_for_testing() return data # When chunk fails to be read, it gets broken into sub-chunks except exceptions.DXIncompleteReadsError: chunk_buffer = StringIO() subchunk_len = int(math.ceil((end_pos - start_pos + 1)/INCOMPLETE_READS_NUM_SUBCHUNKS)) subchunk_start_pos = start_pos while subchunk_start_pos <= end_pos: subchunk_end_pos = min(subchunk_start_pos + subchunk_len - 1, end_pos) headers['Range'] = "bytes=" + str(subchunk_start_pos) + "-" + str(subchunk_end_pos) subchunk_start_pos += subchunk_len data = DXHTTPRequest(url, '', method='GET', headers=headers, auth=None, jsonify_data=False, prepend_srv=False, always_retry=True, timeout=timeout, decode_response_body=False) # Concatenate sub-chunks chunk_buffer.write(data) concat_chunks = chunk_buffer.getvalue() chunk_buffer.close() return concat_chunks
[docs]def set_api_server_info(host=None, port=None, protocol=None): ''' :param host: API server hostname :type host: string :param port: API server port. If not specified, *port* is guessed based on *protocol*. :type port: string :param protocol: Either "http" or "https" :type protocol: string Overrides the current settings for which API server to communicate with. Any parameters that are not explicitly specified are not overridden. ''' global APISERVER_PROTOCOL, APISERVER_HOST, APISERVER_PORT, APISERVER if host is not None: APISERVER_HOST = host if port is not None: APISERVER_PORT = port if protocol is not None: APISERVER_PROTOCOL = protocol if port is None or port == '': APISERVER = APISERVER_PROTOCOL + "://" + APISERVER_HOST else: APISERVER = APISERVER_PROTOCOL + "://" + APISERVER_HOST + ":" + str(APISERVER_PORT)
[docs]def set_security_context(security_context): ''' :param security_context: Authentication hash, usually with keys ``auth_token_type`` set to ``Bearer`` and ``auth_token`` set to the authentication token. :type security_context: dict Sets the security context to use the provided token. ''' global SECURITY_CONTEXT, AUTH_HELPER SECURITY_CONTEXT = security_context AUTH_HELPER = DXHTTPOAuth2(security_context)
[docs]def set_job_id(dxid): """ :param dxid: ID of a job :type dxid: string Sets the ID of the running job. .. warning:: This function is only really useful if you are developing code that will run in and interact with the Execution Environment, but wish to test it outside of an actual Execution Environment. """ global JOB_ID JOB_ID = dxid
[docs]def set_workspace_id(dxid): """ :param dxid: ID of a project or workspace :type dxid: string Sets the default data container for object creation and modification to the specified project or workspace. """ global WORKSPACE_ID WORKSPACE_ID = dxid
[docs]def set_project_context(dxid): """ :param dxid: Project ID :type dxid: string Sets the project context for a running job. .. warning:: This function is only really useful if you are developing code that will run in and interact with the Execution Environment but wish to test it outside of an actual Execution Environment. It does not change the default data container in which new objects are created or name resolution is performed. If you want to do that, use :func:`set_workspace_id` instead. """ global PROJECT_CONTEXT_ID PROJECT_CONTEXT_ID = dxid
[docs]def set_watch_port(port=None): """ :param port: port to use for streaming job logs :type port: string Sets the port to use for streaming job logs via `dx watch` inside the Execution Environment .. warning:: This function is only really useful if you are developing code that will run in and interact with the Execution Environment. """ global WATCH_PORT WATCH_PORT = port
[docs]def get_auth_server_name(host_override=None, port_override=None, protocol='https'): """ Chooses the auth server name from the currently configured API server name. Raises DXError if the auth server name cannot be guessed and the overrides are not provided (or improperly provided). """ if host_override is not None or port_override is not None: if host_override is None or port_override is None: raise exceptions.DXError("Both host and port must be specified if either is specified") return protocol + '://' + host_override + ':' + str(port_override) elif APISERVER_HOST == 'stagingapi.dnanexus.com': return 'https://stagingauth.dnanexus.com' elif APISERVER_HOST == 'api.dnanexus.com': return 'https://auth.dnanexus.com' elif APISERVER_HOST == 'stagingapi.cn.dnanexus.com': return 'https://stagingauth.cn.dnanexus.com:7001' elif APISERVER_HOST == 'api.cn.dnanexus.com': return 'https://auth.cn.dnanexus.com:8001' elif APISERVER_HOST == "localhost" or APISERVER_HOST == "127.0.0.1": if "DX_AUTHSERVER_HOST" not in os.environ or "DX_AUTHSERVER_PORT" not in os.environ: err_msg = "Must set authserver env vars (DX_AUTHSERVER_HOST, DX_AUTHSERVER_PORT) if apiserver is {apiserver}." raise exceptions.DXError(err_msg.format(apiserver=APISERVER_HOST)) else: return os.environ["DX_AUTHSERVER_HOST"] + ":" + os.environ["DX_AUTHSERVER_PORT"] else: err_msg = "Could not determine which auth server is associated with {apiserver}." raise exceptions.DXError(err_msg.format(apiserver=APISERVER_HOST))
'''This field is used for testing a retry of an Http request. The caller can pass an argument "_test_retry_http_request"=1 to DXHTTPREQUEST to simulate a request that required a retry. The first response will be returned and the second response can be retrieved by calling _get_retry_response ''' _retry_response = None def _set_retry_response(response): global _retry_response _retry_response = response def _get_retry_response(): return _retry_response
[docs]def append_underlying_workflow_describe(globalworkflow_desc): """ Adds the "workflowDescribe" field to the config for each region of the global workflow. The value is the description of an underlying workflow in that region. """ if not globalworkflow_desc or \ globalworkflow_desc['class'] != 'globalworkflow' or \ not 'regionalOptions' in globalworkflow_desc: return globalworkflow_desc for region, config in globalworkflow_desc['regionalOptions'].items(): workflow_id = config['workflow'] workflow_desc = dxpy.api.workflow_describe(workflow_id, input_params={"project": config["resources"]}) globalworkflow_desc['regionalOptions'][region]['workflowDescribe'] = workflow_desc return globalworkflow_desc
from .utils.config import DXConfig as _DXConfig config = _DXConfig() from .bindings import * from .dxlog import DXLogHandler from .utils.exec_utils import run, entry_point