Module destination_based_sales.utils

This module defines several useful functions, mobilised throughout the other Python files.

Expand source code
"""
This module defines several useful functions, mobilised throughout the other Python files.
"""

########################################################################################################################
# --- Imports

import os
import json

import numpy as np


########################################################################################################################
# --- For IRS data

path_to_dir = os.path.dirname(os.path.abspath(__file__))

path_to_codes_to_impute_IRS = os.path.join(path_to_dir, 'data', 'codes_to_impute_IRS.json')
path_to_codes_to_impute_BEA = os.path.join(path_to_dir, 'data', 'codes_to_impute_BEA.json')

with open(path_to_codes_to_impute_IRS) as file:
    CODES_TO_IMPUTE_IRS = json.loads(file.read())

with open(path_to_codes_to_impute_BEA) as file:
    CODES_TO_IMPUTE_BEA = json.loads(file.read())

CONTINENT_CODES_TO_IMPUTE_TRADE = {
    'OASIAOCN': 'APAC',
    'UKI': 'AMR'
}

CONTINENT_CODES_TO_IMPUTE_OECD_CBCR = {
    'OAF': 'AFR',
    'OAM': 'AMR',
    'OAS': 'APAC',
    'OTE': 'EUR',
    'AFRIC': 'AFR',
    'AMER': 'AMR',
    'ASIAT': 'APAC',
    'EUROP': 'EUR',
    'GRPS': 'OTHER_GROUPS',
    'UKI': 'AMR'
}

UK_CARIBBEAN_ISLANDS = [
    'CYM',
    'VGB',
    'AIA',
    'MSR',
    'TCA'
]


def impute_missing_codes(row, column, codes_to_impute):
    if row['AFFILIATE_COUNTRY_NAME'] in codes_to_impute.keys():
        return codes_to_impute[row['AFFILIATE_COUNTRY_NAME']][column]

    else:
        return row[column]


########################################################################################################################
# --- For splitting revenue variables

def eliminate_irrelevant_percentages(row, column):
    sales_type = column.split('_')[1]

    indicator_column = '_'.join(['IS', sales_type, 'COMPLETE'])

    if row[indicator_column] != 0:
        return row[column]

    else:
        return np.nan


def impute_missing_values(row, column, imputations):
    if np.isnan(row[column]):
        return imputations[row['CONTINENT_CODE']][column]

    else:
        return row[column]


# FOR TRADE STATISTICS (AND OECD CBCR FOR THE FIRST FUNCTION)

def impute_missing_continent_codes(row, mapping):
    if not isinstance(row['CONTINENT_CODE'], str) and np.isnan(row['CONTINENT_CODE']):
        if 'CODE' in row.index:
            if isinstance(row['CODE'], float) and np.isnan(row['CODE']):
                print(row)
            return mapping[row['CODE']]
        else:
            return mapping[row['AFFILIATE_COUNTRY_CODE']]

    else:
        return row['CONTINENT_CODE']


def ensure_country_overlap_with_IRS(row, unique_IRS_country_codes, UK_caribbean_islands):
    mapping = {
        'EUR': 'OEUR',
        'AFR': 'OAFR',
        'ASIA': 'OASIAOCN',
        'SAMR': 'OAMR',
        'NAMR': 'OAMR',
        'OCN': 'OASIAOCN'
    }

    if row['OTHER_COUNTRY_CODE'] in UK_caribbean_islands:
        return 'UKI'

    else:
        if row['OTHER_COUNTRY_CODE'] in unique_IRS_country_codes:
            return row['OTHER_COUNTRY_CODE']

        elif row['OTHER_COUNTRY_CODE'] == 'RWD':
            return row['OTHER_COUNTRY_CODE']

        else:
            return mapping[row['CONTINENT_CODE']]


def ensure_country_overlap_with_OECD_CbCR(row, unique_OECD_country_codes, UK_caribbean_islands):
    mapping = {
        'EUR': 'OEUR',
        'AFR': 'OAFR',
        'ASIA': 'OASIAOCN',
        'SAMR': 'OAMR',
        'NAMR': 'OAMR',
        'OCN': 'OASIAOCN'
    }

    if row['OTHER_COUNTRY_CODE'] in UK_caribbean_islands:
        return 'UKI'

    else:
        if row['OTHER_COUNTRY_CODE'] in unique_OECD_country_codes or row['OTHER_COUNTRY_CODE'] == 'RWD':
            return row['OTHER_COUNTRY_CODE']

        else:
            return mapping[row['CONTINENT_CODE']]


class ServicesDataTransformer:

    def __init__(self):
        self.amounts_to_distribute = {}

        self.allocations = {}
        self.list_of_OTHER_codes = ['OAFR', 'OAMR', 'OASIAOCN', 'OEUR']

    def fit(self, data):
        for country in data['AFFILIATE_COUNTRY_CODE'].unique():
            mask_affiliate_country = data['AFFILIATE_COUNTRY_CODE'] == country

            mask_RWD = data['OTHER_COUNTRY_CODE'] == 'RWD'
            mask_OTHER = data['OTHER_COUNTRY_CODE'].isin(self.list_of_OTHER_codes)

            mask = np.logical_and(mask_affiliate_country, mask_RWD)

            if not data[mask].empty:
                self.amounts_to_distribute[country] = data[mask]['SERVICES_EXPORTS'].iloc[0]

            else:
                self.amounts_to_distribute[country] = 0

            mask = np.logical_and(mask_affiliate_country, mask_OTHER)

            if not data[mask].empty:
                restricted_df = data[mask].copy()

                restricted_df['ALLOCABLE_SHARE'] = (
                    restricted_df['SERVICES_EXPORTS'] / restricted_df['SERVICES_EXPORTS'].sum()
                )

                self.allocations[country] = {}

                for code in self.list_of_OTHER_codes:
                    if code not in restricted_df['OTHER_COUNTRY_CODE'].unique():
                        self.allocations[country][code] = 0

                    else:
                        self.allocations[country][code] = restricted_df[
                            restricted_df['OTHER_COUNTRY_CODE'] == code
                        ]['ALLOCABLE_SHARE'].iloc[0]

            else:
                self.allocations[country] = {
                    code: 0.25 for code in self.list_of_OTHER_codes
                }

    def transform(self, data):
        data = data[data['OTHER_COUNTRY_CODE'] != 'RWD'].copy()

        data['SERVICES_EXPORTS'] = data.apply(
            (
                lambda row: row['SERVICES_EXPORTS'] + self.amounts_to_distribute[row['AFFILIATE_COUNTRY_CODE']]
                * self.allocations[row['AFFILIATE_COUNTRY_CODE']][row['OTHER_COUNTRY_CODE']]
                if row['OTHER_COUNTRY_CODE'] in self.list_of_OTHER_codes else row['SERVICES_EXPORTS']
            ),
            axis=1
        )

        return data.reset_index(drop=True)


########################################################################################################################
# --- For Analytical AMNE data

def compute_foreign_owned_gross_output(row, include_US):
    foreign_owned_gross_output = 0

    for column in row.index:
        if column in ['cou', 'GROSS_OUTPUT_INCL_US']:
            continue

        elif column == row['cou']:
            continue

        else:
            foreign_owned_gross_output += row[column]

    if include_US:
        return foreign_owned_gross_output

    else:
        return foreign_owned_gross_output - row['USA']

Functions

def compute_foreign_owned_gross_output(row, include_US)
Expand source code
def compute_foreign_owned_gross_output(row, include_US):
    foreign_owned_gross_output = 0

    for column in row.index:
        if column in ['cou', 'GROSS_OUTPUT_INCL_US']:
            continue

        elif column == row['cou']:
            continue

        else:
            foreign_owned_gross_output += row[column]

    if include_US:
        return foreign_owned_gross_output

    else:
        return foreign_owned_gross_output - row['USA']
def eliminate_irrelevant_percentages(row, column)
Expand source code
def eliminate_irrelevant_percentages(row, column):
    sales_type = column.split('_')[1]

    indicator_column = '_'.join(['IS', sales_type, 'COMPLETE'])

    if row[indicator_column] != 0:
        return row[column]

    else:
        return np.nan
def ensure_country_overlap_with_IRS(row, unique_IRS_country_codes, UK_caribbean_islands)
Expand source code
def ensure_country_overlap_with_IRS(row, unique_IRS_country_codes, UK_caribbean_islands):
    mapping = {
        'EUR': 'OEUR',
        'AFR': 'OAFR',
        'ASIA': 'OASIAOCN',
        'SAMR': 'OAMR',
        'NAMR': 'OAMR',
        'OCN': 'OASIAOCN'
    }

    if row['OTHER_COUNTRY_CODE'] in UK_caribbean_islands:
        return 'UKI'

    else:
        if row['OTHER_COUNTRY_CODE'] in unique_IRS_country_codes:
            return row['OTHER_COUNTRY_CODE']

        elif row['OTHER_COUNTRY_CODE'] == 'RWD':
            return row['OTHER_COUNTRY_CODE']

        else:
            return mapping[row['CONTINENT_CODE']]
def ensure_country_overlap_with_OECD_CbCR(row, unique_OECD_country_codes, UK_caribbean_islands)
Expand source code
def ensure_country_overlap_with_OECD_CbCR(row, unique_OECD_country_codes, UK_caribbean_islands):
    mapping = {
        'EUR': 'OEUR',
        'AFR': 'OAFR',
        'ASIA': 'OASIAOCN',
        'SAMR': 'OAMR',
        'NAMR': 'OAMR',
        'OCN': 'OASIAOCN'
    }

    if row['OTHER_COUNTRY_CODE'] in UK_caribbean_islands:
        return 'UKI'

    else:
        if row['OTHER_COUNTRY_CODE'] in unique_OECD_country_codes or row['OTHER_COUNTRY_CODE'] == 'RWD':
            return row['OTHER_COUNTRY_CODE']

        else:
            return mapping[row['CONTINENT_CODE']]
def impute_missing_codes(row, column, codes_to_impute)
Expand source code
def impute_missing_codes(row, column, codes_to_impute):
    if row['AFFILIATE_COUNTRY_NAME'] in codes_to_impute.keys():
        return codes_to_impute[row['AFFILIATE_COUNTRY_NAME']][column]

    else:
        return row[column]
def impute_missing_continent_codes(row, mapping)
Expand source code
def impute_missing_continent_codes(row, mapping):
    if not isinstance(row['CONTINENT_CODE'], str) and np.isnan(row['CONTINENT_CODE']):
        if 'CODE' in row.index:
            if isinstance(row['CODE'], float) and np.isnan(row['CODE']):
                print(row)
            return mapping[row['CODE']]
        else:
            return mapping[row['AFFILIATE_COUNTRY_CODE']]

    else:
        return row['CONTINENT_CODE']
def impute_missing_values(row, column, imputations)
Expand source code
def impute_missing_values(row, column, imputations):
    if np.isnan(row[column]):
        return imputations[row['CONTINENT_CODE']][column]

    else:
        return row[column]

Classes

class ServicesDataTransformer
Expand source code
class ServicesDataTransformer:

    def __init__(self):
        self.amounts_to_distribute = {}

        self.allocations = {}
        self.list_of_OTHER_codes = ['OAFR', 'OAMR', 'OASIAOCN', 'OEUR']

    def fit(self, data):
        for country in data['AFFILIATE_COUNTRY_CODE'].unique():
            mask_affiliate_country = data['AFFILIATE_COUNTRY_CODE'] == country

            mask_RWD = data['OTHER_COUNTRY_CODE'] == 'RWD'
            mask_OTHER = data['OTHER_COUNTRY_CODE'].isin(self.list_of_OTHER_codes)

            mask = np.logical_and(mask_affiliate_country, mask_RWD)

            if not data[mask].empty:
                self.amounts_to_distribute[country] = data[mask]['SERVICES_EXPORTS'].iloc[0]

            else:
                self.amounts_to_distribute[country] = 0

            mask = np.logical_and(mask_affiliate_country, mask_OTHER)

            if not data[mask].empty:
                restricted_df = data[mask].copy()

                restricted_df['ALLOCABLE_SHARE'] = (
                    restricted_df['SERVICES_EXPORTS'] / restricted_df['SERVICES_EXPORTS'].sum()
                )

                self.allocations[country] = {}

                for code in self.list_of_OTHER_codes:
                    if code not in restricted_df['OTHER_COUNTRY_CODE'].unique():
                        self.allocations[country][code] = 0

                    else:
                        self.allocations[country][code] = restricted_df[
                            restricted_df['OTHER_COUNTRY_CODE'] == code
                        ]['ALLOCABLE_SHARE'].iloc[0]

            else:
                self.allocations[country] = {
                    code: 0.25 for code in self.list_of_OTHER_codes
                }

    def transform(self, data):
        data = data[data['OTHER_COUNTRY_CODE'] != 'RWD'].copy()

        data['SERVICES_EXPORTS'] = data.apply(
            (
                lambda row: row['SERVICES_EXPORTS'] + self.amounts_to_distribute[row['AFFILIATE_COUNTRY_CODE']]
                * self.allocations[row['AFFILIATE_COUNTRY_CODE']][row['OTHER_COUNTRY_CODE']]
                if row['OTHER_COUNTRY_CODE'] in self.list_of_OTHER_codes else row['SERVICES_EXPORTS']
            ),
            axis=1
        )

        return data.reset_index(drop=True)

Methods

def fit(self, data)
Expand source code
def fit(self, data):
    for country in data['AFFILIATE_COUNTRY_CODE'].unique():
        mask_affiliate_country = data['AFFILIATE_COUNTRY_CODE'] == country

        mask_RWD = data['OTHER_COUNTRY_CODE'] == 'RWD'
        mask_OTHER = data['OTHER_COUNTRY_CODE'].isin(self.list_of_OTHER_codes)

        mask = np.logical_and(mask_affiliate_country, mask_RWD)

        if not data[mask].empty:
            self.amounts_to_distribute[country] = data[mask]['SERVICES_EXPORTS'].iloc[0]

        else:
            self.amounts_to_distribute[country] = 0

        mask = np.logical_and(mask_affiliate_country, mask_OTHER)

        if not data[mask].empty:
            restricted_df = data[mask].copy()

            restricted_df['ALLOCABLE_SHARE'] = (
                restricted_df['SERVICES_EXPORTS'] / restricted_df['SERVICES_EXPORTS'].sum()
            )

            self.allocations[country] = {}

            for code in self.list_of_OTHER_codes:
                if code not in restricted_df['OTHER_COUNTRY_CODE'].unique():
                    self.allocations[country][code] = 0

                else:
                    self.allocations[country][code] = restricted_df[
                        restricted_df['OTHER_COUNTRY_CODE'] == code
                    ]['ALLOCABLE_SHARE'].iloc[0]

        else:
            self.allocations[country] = {
                code: 0.25 for code in self.list_of_OTHER_codes
            }
def transform(self, data)
Expand source code
def transform(self, data):
    data = data[data['OTHER_COUNTRY_CODE'] != 'RWD'].copy()

    data['SERVICES_EXPORTS'] = data.apply(
        (
            lambda row: row['SERVICES_EXPORTS'] + self.amounts_to_distribute[row['AFFILIATE_COUNTRY_CODE']]
            * self.allocations[row['AFFILIATE_COUNTRY_CODE']][row['OTHER_COUNTRY_CODE']]
            if row['OTHER_COUNTRY_CODE'] in self.list_of_OTHER_codes else row['SERVICES_EXPORTS']
        ),
        axis=1
    )

    return data.reset_index(drop=True)