Module destination_based_sales.global_sales_calculator

Following the model of "analyses.py", this module is the central module for the study of non-US multinational companies' destination-based sales. It builds upon the logic encapsulated in "oecd_cbcr.py", "analytical_amne.py" and "trade_ statistics.py" to output the destination-based mapping of their worldwide revenues; this is covered in the "GlobalSales- Calculator" Python class. Additionally, the "GlobalAnalysisProvider" class allows to reproduce the analyses that can be found in the PDF report. In particular, it allows to re-estimate the revenue gains from the unilateral scenario of Baraké et al. (2021).

Expand source code
"""
Following the model of "analyses.py", this module is the central module for the study of non-US multinational companies'
destination-based sales. It builds upon the logic encapsulated in "oecd_cbcr.py", "analytical_amne.py" and "trade_
statistics.py" to output the destination-based mapping of their worldwide revenues; this is covered in the "GlobalSales-
Calculator" Python class. Additionally, the "GlobalAnalysisProvider" class allows to reproduce the analyses that can be
found in the PDF report. In particular, it allows to re-estimate the revenue gains from the unilateral scenario of
Baraké et al. (2021).
"""


########################################################################################################################
# --- Imports

import os

import numpy as np
import pandas as pd

from destination_based_sales.analytical_amne import AnalyticalAMNEPreprocessor
from destination_based_sales.oecd_cbcr import CbCRPreprocessor
from destination_based_sales.trade_statistics import TradeStatisticsProcessor
from destination_based_sales.analyses import SalesCalculator


########################################################################################################################
# --- Diverse

path_to_dir = os.path.dirname(os.path.abspath(__file__))

path_to_tax_deficits = os.path.join(path_to_dir, 'data', 'total_tax_deficits.xlsx')
path_to_EU_countries = os.path.join(path_to_dir, 'data', 'listofeucountries_csv.csv')

eu_28_country_codes = pd.read_csv(path_to_EU_countries, delimiter=';')
eu_28_country_codes = list(eu_28_country_codes['Alpha-3 code'].unique())
eu_27_country_codes = [c for c in eu_28_country_codes if c not in ['GBR', 'BGR', 'HRV', 'ROU', 'LTU']]


########################################################################################################################
# --- Content

class GlobalSalesCalculator:

    def __init__(self, winsorize_export_percs=True):

        self.aamne_preprocessor = AnalyticalAMNEPreprocessor()

        self.oecd_preprocessor = CbCRPreprocessor()
        self.oecd = self.oecd_preprocessor.get_preprocessed_revenue_data()

        self.trade_stat_processor = TradeStatisticsProcessor(
            year=2016,
            winsorize_export_percs=winsorize_export_percs,
            US_only=False
        )

        self.US_sales_calculator = SalesCalculator(year=2016)

        self.basic_revenue_columns = ['RELATED_PARTY_REVENUES', 'TOTAL_REVENUES', 'UNRELATED_PARTY_REVENUES'].copy()

        self.geo_columns = [
            'PARENT_COUNTRY_CODE', 'PARENT_COUNTRY_NAME', 'AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME'
        ].copy()

    def get_foreign_sales_split(self):

        foreign_aamne_data = self.aamne_preprocessor.get_extended_foreign_analytical_amne_data()

        oecd_foreign = self.oecd[
            self.oecd['PARENT_COUNTRY_CODE'] != self.oecd['AFFILIATE_COUNTRY_CODE']
        ].copy()

        merged_df_foreign = oecd_foreign.merge(
            foreign_aamne_data,
            how='left',
            on='AFFILIATE_COUNTRY_CODE'
        )

        for revenue_column in self.basic_revenue_columns:
            for perc_column in ['PERC_TO_AFFILIATE_COUNTRY', 'PERC_TO_HEADQUARTER_COUNTRY', 'PERC_TO_OTHER_COUNTRY']:

                suffix = perc_column.replace('PERC', '')

                merged_df_foreign[revenue_column + suffix] = \
                    merged_df_foreign[revenue_column] * merged_df_foreign[perc_column]

        merged_df_foreign.drop(
            columns=[
                'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES',
                'UNRELATED_PARTY_REVENUES', 'CONTINENT_CODE',
                'PERC_TO_AFFILIATE_COUNTRY', 'PERC_TO_HEADQUARTER_COUNTRY',
                'PERC_TO_OTHER_COUNTRY'
            ],
            inplace=True
        )

        return merged_df_foreign.copy()

    def get_domestic_sales_split(self):

        domestic_aamne_data = self.aamne_preprocessor.get_extended_domestic_analytical_amne_data()

        oecd_domestic = self.oecd[
            self.oecd['PARENT_COUNTRY_CODE'] == self.oecd['AFFILIATE_COUNTRY_CODE']
        ].copy()

        merged_df_domestic = oecd_domestic.merge(
            domestic_aamne_data,
            how='left',
            on='PARENT_COUNTRY_CODE'
        )

        for revenue_column in self.basic_revenue_columns:
            merged_df_domestic[revenue_column + '_TO_HEADQUARTER_COUNTRY'] = \
                merged_df_domestic['PERC_DOMESTIC_SALES'] * merged_df_domestic[revenue_column]

            merged_df_domestic[revenue_column + '_TO_AFFILIATE_COUNTRY'] = 0

            merged_df_domestic[revenue_column + '_TO_OTHER_COUNTRY'] = \
                merged_df_domestic['PERC_TO_OTHER_COUNTRY'] * merged_df_domestic[revenue_column]

        merged_df_domestic.drop(
            columns=[
                'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES',
                'UNRELATED_PARTY_REVENUES', 'CONTINENT_CODE',
                'PERC_DOMESTIC_SALES', 'PERC_TO_OTHER_COUNTRY'
            ],
            inplace=True
        )

        return merged_df_domestic.copy()

    def get_complete_revenue_split(self):

        merged_df_foreign = self.get_foreign_sales_split()
        merged_df_domestic = self.get_domestic_sales_split()

        complete_df = pd.concat([merged_df_domestic, merged_df_foreign], axis=0)

        headquarter_country_columns = []
        affiliate_country_columns = []
        other_country_columns = []

        for column in complete_df.columns:
            if column in self.geo_columns:
                continue

            elif 'HEADQUARTER' in column:
                headquarter_country_columns.append(column)

            elif 'AFFILIATE' in column:
                affiliate_country_columns.append(column)

            else:
                other_country_columns.append(column)

        headquarter_df = complete_df[self.geo_columns + headquarter_country_columns].copy()
        affiliate_df = complete_df[self.geo_columns + affiliate_country_columns].copy()
        other_country_df = complete_df[self.geo_columns + other_country_columns].copy()

        return headquarter_df, affiliate_df, other_country_df

    def get_already_attributed_sales(self):

        headquarter_df, affiliate_df, _ = self.get_complete_revenue_split()

        headquarter_df['ULTIMATE_DESTINATION_CODE'] = headquarter_df['PARENT_COUNTRY_CODE'].values
        headquarter_df['ULTIMATE_DESTINATION_NAME'] = headquarter_df['PARENT_COUNTRY_NAME'].values

        affiliate_df['ULTIMATE_DESTINATION_CODE'] = affiliate_df['AFFILIATE_COUNTRY_CODE'].values
        affiliate_df['ULTIMATE_DESTINATION_NAME'] = affiliate_df['AFFILIATE_COUNTRY_NAME'].values

        headquarter_df.rename(
            columns={
                'RELATED_PARTY_REVENUES_TO_HEADQUARTER_COUNTRY': 'RELATED_PARTY_REVENUES',
                'UNRELATED_PARTY_REVENUES_TO_HEADQUARTER_COUNTRY': 'UNRELATED_PARTY_REVENUES',
                'TOTAL_REVENUES_TO_HEADQUARTER_COUNTRY': 'TOTAL_REVENUES'
            },
            inplace=True
        )

        affiliate_df.rename(
            columns={
                'RELATED_PARTY_REVENUES_TO_AFFILIATE_COUNTRY': 'RELATED_PARTY_REVENUES',
                'UNRELATED_PARTY_REVENUES_TO_AFFILIATE_COUNTRY': 'UNRELATED_PARTY_REVENUES',
                'TOTAL_REVENUES_TO_AFFILIATE_COUNTRY': 'TOTAL_REVENUES'
            },
            inplace=True
        )

        already_attributed = pd.concat([headquarter_df, affiliate_df], axis=0)

        return already_attributed.copy()

    def attribute_other_country_sales(self):

        _, _, to_be_attributed = self.get_complete_revenue_split()

        df = self.trade_stat_processor.load_data_with_imputations()

        merged_df = to_be_attributed.merge(
            df,
            how='left',
            on='AFFILIATE_COUNTRY_CODE'
        )

        totals = {}
        merged_df['KEY'] = merged_df['PARENT_COUNTRY_CODE'] + merged_df['AFFILIATE_COUNTRY_CODE']

        for key in merged_df['KEY'].unique():
            restricted_df = merged_df[merged_df['KEY'] == key].copy()

            totals[key] = restricted_df[
                restricted_df['OTHER_COUNTRY_CODE'] != key[:3]
            ]['ALL_EXPORTS'].sum()

        merged_df['EXPORT_PERC'] = merged_df.apply(
            lambda row: row['ALL_EXPORTS'] / totals[
                row['PARENT_COUNTRY_CODE'] + row['AFFILIATE_COUNTRY_CODE']
            ],
            axis=1
        )

        merged_df = merged_df[merged_df['PARENT_COUNTRY_CODE'] != merged_df['OTHER_COUNTRY_CODE']].copy()

        columns_to_drop = []

        for revenue_column in ['UNRELATED_PARTY_REVENUES', 'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES']:
            column_to_drop = revenue_column + '_TO_OTHER_COUNTRY'

            columns_to_drop.append(column_to_drop)

            merged_df[revenue_column] = \
                merged_df[column_to_drop] * merged_df['EXPORT_PERC']

        columns_to_drop += ['ALL_EXPORTS', 'EXPORT_PERC', 'KEY']

        merged_df.drop(columns=columns_to_drop, inplace=True)

        merged_df.rename(
            columns={
                'OTHER_COUNTRY_CODE': 'ULTIMATE_DESTINATION_CODE'
            },
            inplace=True
        )

        return merged_df.copy()

    def get_simplified_sales_mapping_without_US(self):

        already_attributed = self.get_already_attributed_sales()

        already_attributed.drop(
            columns=[
                'AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME',
                'PARENT_COUNTRY_NAME', 'ULTIMATE_DESTINATION_NAME'
            ],
            inplace=True
        )

        merged_df = self.attribute_other_country_sales()

        merged_df.drop(
            columns=['AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME', 'PARENT_COUNTRY_NAME'],
            inplace=True
        )

        merged_df = pd.concat(
            [already_attributed, merged_df],
            axis=0
        )

        merged_df = merged_df.groupby(
            ['PARENT_COUNTRY_CODE', 'ULTIMATE_DESTINATION_CODE']
        ).sum().reset_index()

        return merged_df[merged_df['PARENT_COUNTRY_CODE'] != 'USA'].copy()

    def get_simplified_sales_mapping(self):

        sales_mapping_excl_US = self.get_simplified_sales_mapping_without_US()

        US_sales_mapping = self.US_sales_calculator.get_final_dataframe()

        US_sales_mapping.drop(
            columns=['AFFILIATE_COUNTRY_CODE', 'EXPORT_PERC', 'AFFILIATE_COUNTRY_NAME'],
            inplace=True
        )

        US_sales_mapping = US_sales_mapping.groupby('OTHER_COUNTRY_CODE').sum().reset_index()

        US_sales_mapping['PARENT_COUNTRY_CODE'] = 'USA'

        US_sales_mapping.rename(
            columns={
                'OTHER_COUNTRY_CODE': 'ULTIMATE_DESTINATION_CODE'
            },
            inplace=True
        )

        sales_mapping = pd.concat([sales_mapping_excl_US, US_sales_mapping], axis=0)

        return sales_mapping.copy()


class GlobalAnalysisProvider:

    def __init__(self, path_to_tax_deficits=path_to_tax_deficits):
        self.global_sales_calculator = GlobalSalesCalculator()
        self.sales_mapping = self.global_sales_calculator.get_simplified_sales_mapping()

        self.path_to_tax_deficits = path_to_tax_deficits

    def compute_unilateral_scenario_gains(
        self,
        taxing_country_code,
        minimum_ETR=0.25,
        return_split=False
    ):
        if minimum_ETR not in [0.15, 0.21, 0.25, 0.3]:
            raise Exception('Only the 4 benchmark minimum ETRs of the June 1st report are accepted.')

        sales_mapping = self.sales_mapping.copy()

        tax_deficits = pd.read_excel(
            self.path_to_tax_deficits,
            sheet_name=f'{int(minimum_ETR * 100)}_percent',
            engine='openpyxl'
        )

        tax_deficits = tax_deficits[tax_deficits['Parent jurisdiction (alpha-3 code)'] != '..'].copy()

        attribution_ratios = []

        for country_code in tax_deficits['Parent jurisdiction (alpha-3 code)'].unique():
            restricted_sales_mapping = sales_mapping[
                sales_mapping['PARENT_COUNTRY_CODE'] == country_code
            ].copy()

            denominator = restricted_sales_mapping['UNRELATED_PARTY_REVENUES'].sum()

            if taxing_country_code == country_code:
                attribution_ratios.append(1)

            elif taxing_country_code in restricted_sales_mapping['ULTIMATE_DESTINATION_CODE'].unique():
                numerator = restricted_sales_mapping[
                    restricted_sales_mapping['ULTIMATE_DESTINATION_CODE'] == taxing_country_code
                ]['UNRELATED_PARTY_REVENUES'].iloc[0]

                attribution_ratios.append(numerator / denominator)

            else:
                attribution_ratios.append(0)

        tax_deficits['ATTRIBUTION_RATIOS'] = attribution_ratios
        tax_deficits['COLLECTIBLE_TAX_DEFICIT'] = tax_deficits['tax_deficit'] * tax_deficits['ATTRIBUTION_RATIOS']

        imputation = tax_deficits[
            ~tax_deficits['Parent jurisdiction (alpha-3 code)'].isin([taxing_country_code, 'USA'])
        ]['COLLECTIBLE_TAX_DEFICIT'].sum()

        imputation_adjusted = imputation

        if taxing_country_code == 'DEU':
            imputation_adjusted /= 2

        if not return_split:
            return tax_deficits['COLLECTIBLE_TAX_DEFICIT'].sum() + imputation_adjusted

        else:
            own_tax_deficit = tax_deficits[
                tax_deficits['Parent jurisdiction (alpha-3 code)'] == taxing_country_code
            ]['COLLECTIBLE_TAX_DEFICIT'].iloc[0]

            if taxing_country_code == 'USA':
                collected_from_the_US = 0

            else:
                collected_from_the_US = tax_deficits[
                    tax_deficits['Parent jurisdiction (alpha-3 code)'] == 'USA'
                ]['COLLECTIBLE_TAX_DEFICIT'].iloc[0]

            collected_from_other_multinationals = imputation + imputation_adjusted

            total = own_tax_deficit + collected_from_the_US + collected_from_other_multinationals

            return own_tax_deficit, collected_from_the_US, collected_from_other_multinationals, total

    def build_new_table_3(self, output_excel=False):

        output = {
            'OWN_TAX_DEFICIT': [],
            'COLLECTIBLE_FROM_US_MNEs': [],
            'COLLECTIBLE_FROM_OTHER_MNEs': [],
            'TOTAL': []
        }

        oecd = self.global_sales_calculator.oecd.copy()

        oecd_reporting_countries = list(oecd['PARENT_COUNTRY_CODE'].unique())
        oecd_reporting_countries += ['KOR', 'NLD', 'IRL', 'FIN']

        countries_to_display = sorted(eu_27_country_codes).copy()

        for country_code in sorted(oecd_reporting_countries):
            if country_code not in countries_to_display:
                countries_to_display.append(country_code)

            else:
                continue

        output['COUNTRY_CODE'] = countries_to_display.copy()

        for country_code in countries_to_display:

            own_tax_deficit, collected_from_the_US, collected_from_other_multinationals, total = \
                self.compute_unilateral_scenario_gains(country_code, return_split=True)

            output['OWN_TAX_DEFICIT'].append(own_tax_deficit)
            output['COLLECTIBLE_FROM_US_MNEs'].append(collected_from_the_US)
            output['COLLECTIBLE_FROM_OTHER_MNEs'].append(collected_from_other_multinationals)
            output['TOTAL'].append(total)

        output_df = pd.DataFrame.from_dict(output)

        if output_excel:
            with pd.ExcelWriter('/Users/Paul-Emmanuel/Desktop/new_table_3.xlsx', engine='xlsxwriter') as writer:
                output_df.to_excel(writer, sheet_name='revised_table', index=False)

        return output_df.copy()

    def build_new_table_3_formatted(self):

        output_df = self.build_new_table_3()

        columns = ['COUNTRY_CODE'] + list(output_df.columns[:-1])

        output_df = output_df[columns].copy()

        for column in output_df.columns[1:]:
            output_df[column] /= 10**9

            output_df[column] = output_df[column].map('{:.1f}'.format)

        return output_df.copy()

Classes

class GlobalAnalysisProvider (path_to_tax_deficits='/Users/Paul-Emmanuel/Desktop/destination_based_sales/destination_based_sales/data/total_tax_deficits.xlsx')
Expand source code
class GlobalAnalysisProvider:

    def __init__(self, path_to_tax_deficits=path_to_tax_deficits):
        self.global_sales_calculator = GlobalSalesCalculator()
        self.sales_mapping = self.global_sales_calculator.get_simplified_sales_mapping()

        self.path_to_tax_deficits = path_to_tax_deficits

    def compute_unilateral_scenario_gains(
        self,
        taxing_country_code,
        minimum_ETR=0.25,
        return_split=False
    ):
        if minimum_ETR not in [0.15, 0.21, 0.25, 0.3]:
            raise Exception('Only the 4 benchmark minimum ETRs of the June 1st report are accepted.')

        sales_mapping = self.sales_mapping.copy()

        tax_deficits = pd.read_excel(
            self.path_to_tax_deficits,
            sheet_name=f'{int(minimum_ETR * 100)}_percent',
            engine='openpyxl'
        )

        tax_deficits = tax_deficits[tax_deficits['Parent jurisdiction (alpha-3 code)'] != '..'].copy()

        attribution_ratios = []

        for country_code in tax_deficits['Parent jurisdiction (alpha-3 code)'].unique():
            restricted_sales_mapping = sales_mapping[
                sales_mapping['PARENT_COUNTRY_CODE'] == country_code
            ].copy()

            denominator = restricted_sales_mapping['UNRELATED_PARTY_REVENUES'].sum()

            if taxing_country_code == country_code:
                attribution_ratios.append(1)

            elif taxing_country_code in restricted_sales_mapping['ULTIMATE_DESTINATION_CODE'].unique():
                numerator = restricted_sales_mapping[
                    restricted_sales_mapping['ULTIMATE_DESTINATION_CODE'] == taxing_country_code
                ]['UNRELATED_PARTY_REVENUES'].iloc[0]

                attribution_ratios.append(numerator / denominator)

            else:
                attribution_ratios.append(0)

        tax_deficits['ATTRIBUTION_RATIOS'] = attribution_ratios
        tax_deficits['COLLECTIBLE_TAX_DEFICIT'] = tax_deficits['tax_deficit'] * tax_deficits['ATTRIBUTION_RATIOS']

        imputation = tax_deficits[
            ~tax_deficits['Parent jurisdiction (alpha-3 code)'].isin([taxing_country_code, 'USA'])
        ]['COLLECTIBLE_TAX_DEFICIT'].sum()

        imputation_adjusted = imputation

        if taxing_country_code == 'DEU':
            imputation_adjusted /= 2

        if not return_split:
            return tax_deficits['COLLECTIBLE_TAX_DEFICIT'].sum() + imputation_adjusted

        else:
            own_tax_deficit = tax_deficits[
                tax_deficits['Parent jurisdiction (alpha-3 code)'] == taxing_country_code
            ]['COLLECTIBLE_TAX_DEFICIT'].iloc[0]

            if taxing_country_code == 'USA':
                collected_from_the_US = 0

            else:
                collected_from_the_US = tax_deficits[
                    tax_deficits['Parent jurisdiction (alpha-3 code)'] == 'USA'
                ]['COLLECTIBLE_TAX_DEFICIT'].iloc[0]

            collected_from_other_multinationals = imputation + imputation_adjusted

            total = own_tax_deficit + collected_from_the_US + collected_from_other_multinationals

            return own_tax_deficit, collected_from_the_US, collected_from_other_multinationals, total

    def build_new_table_3(self, output_excel=False):

        output = {
            'OWN_TAX_DEFICIT': [],
            'COLLECTIBLE_FROM_US_MNEs': [],
            'COLLECTIBLE_FROM_OTHER_MNEs': [],
            'TOTAL': []
        }

        oecd = self.global_sales_calculator.oecd.copy()

        oecd_reporting_countries = list(oecd['PARENT_COUNTRY_CODE'].unique())
        oecd_reporting_countries += ['KOR', 'NLD', 'IRL', 'FIN']

        countries_to_display = sorted(eu_27_country_codes).copy()

        for country_code in sorted(oecd_reporting_countries):
            if country_code not in countries_to_display:
                countries_to_display.append(country_code)

            else:
                continue

        output['COUNTRY_CODE'] = countries_to_display.copy()

        for country_code in countries_to_display:

            own_tax_deficit, collected_from_the_US, collected_from_other_multinationals, total = \
                self.compute_unilateral_scenario_gains(country_code, return_split=True)

            output['OWN_TAX_DEFICIT'].append(own_tax_deficit)
            output['COLLECTIBLE_FROM_US_MNEs'].append(collected_from_the_US)
            output['COLLECTIBLE_FROM_OTHER_MNEs'].append(collected_from_other_multinationals)
            output['TOTAL'].append(total)

        output_df = pd.DataFrame.from_dict(output)

        if output_excel:
            with pd.ExcelWriter('/Users/Paul-Emmanuel/Desktop/new_table_3.xlsx', engine='xlsxwriter') as writer:
                output_df.to_excel(writer, sheet_name='revised_table', index=False)

        return output_df.copy()

    def build_new_table_3_formatted(self):

        output_df = self.build_new_table_3()

        columns = ['COUNTRY_CODE'] + list(output_df.columns[:-1])

        output_df = output_df[columns].copy()

        for column in output_df.columns[1:]:
            output_df[column] /= 10**9

            output_df[column] = output_df[column].map('{:.1f}'.format)

        return output_df.copy()

Methods

def build_new_table_3(self, output_excel=False)
Expand source code
def build_new_table_3(self, output_excel=False):

    output = {
        'OWN_TAX_DEFICIT': [],
        'COLLECTIBLE_FROM_US_MNEs': [],
        'COLLECTIBLE_FROM_OTHER_MNEs': [],
        'TOTAL': []
    }

    oecd = self.global_sales_calculator.oecd.copy()

    oecd_reporting_countries = list(oecd['PARENT_COUNTRY_CODE'].unique())
    oecd_reporting_countries += ['KOR', 'NLD', 'IRL', 'FIN']

    countries_to_display = sorted(eu_27_country_codes).copy()

    for country_code in sorted(oecd_reporting_countries):
        if country_code not in countries_to_display:
            countries_to_display.append(country_code)

        else:
            continue

    output['COUNTRY_CODE'] = countries_to_display.copy()

    for country_code in countries_to_display:

        own_tax_deficit, collected_from_the_US, collected_from_other_multinationals, total = \
            self.compute_unilateral_scenario_gains(country_code, return_split=True)

        output['OWN_TAX_DEFICIT'].append(own_tax_deficit)
        output['COLLECTIBLE_FROM_US_MNEs'].append(collected_from_the_US)
        output['COLLECTIBLE_FROM_OTHER_MNEs'].append(collected_from_other_multinationals)
        output['TOTAL'].append(total)

    output_df = pd.DataFrame.from_dict(output)

    if output_excel:
        with pd.ExcelWriter('/Users/Paul-Emmanuel/Desktop/new_table_3.xlsx', engine='xlsxwriter') as writer:
            output_df.to_excel(writer, sheet_name='revised_table', index=False)

    return output_df.copy()
def build_new_table_3_formatted(self)
Expand source code
def build_new_table_3_formatted(self):

    output_df = self.build_new_table_3()

    columns = ['COUNTRY_CODE'] + list(output_df.columns[:-1])

    output_df = output_df[columns].copy()

    for column in output_df.columns[1:]:
        output_df[column] /= 10**9

        output_df[column] = output_df[column].map('{:.1f}'.format)

    return output_df.copy()
def compute_unilateral_scenario_gains(self, taxing_country_code, minimum_ETR=0.25, return_split=False)
Expand source code
def compute_unilateral_scenario_gains(
    self,
    taxing_country_code,
    minimum_ETR=0.25,
    return_split=False
):
    if minimum_ETR not in [0.15, 0.21, 0.25, 0.3]:
        raise Exception('Only the 4 benchmark minimum ETRs of the June 1st report are accepted.')

    sales_mapping = self.sales_mapping.copy()

    tax_deficits = pd.read_excel(
        self.path_to_tax_deficits,
        sheet_name=f'{int(minimum_ETR * 100)}_percent',
        engine='openpyxl'
    )

    tax_deficits = tax_deficits[tax_deficits['Parent jurisdiction (alpha-3 code)'] != '..'].copy()

    attribution_ratios = []

    for country_code in tax_deficits['Parent jurisdiction (alpha-3 code)'].unique():
        restricted_sales_mapping = sales_mapping[
            sales_mapping['PARENT_COUNTRY_CODE'] == country_code
        ].copy()

        denominator = restricted_sales_mapping['UNRELATED_PARTY_REVENUES'].sum()

        if taxing_country_code == country_code:
            attribution_ratios.append(1)

        elif taxing_country_code in restricted_sales_mapping['ULTIMATE_DESTINATION_CODE'].unique():
            numerator = restricted_sales_mapping[
                restricted_sales_mapping['ULTIMATE_DESTINATION_CODE'] == taxing_country_code
            ]['UNRELATED_PARTY_REVENUES'].iloc[0]

            attribution_ratios.append(numerator / denominator)

        else:
            attribution_ratios.append(0)

    tax_deficits['ATTRIBUTION_RATIOS'] = attribution_ratios
    tax_deficits['COLLECTIBLE_TAX_DEFICIT'] = tax_deficits['tax_deficit'] * tax_deficits['ATTRIBUTION_RATIOS']

    imputation = tax_deficits[
        ~tax_deficits['Parent jurisdiction (alpha-3 code)'].isin([taxing_country_code, 'USA'])
    ]['COLLECTIBLE_TAX_DEFICIT'].sum()

    imputation_adjusted = imputation

    if taxing_country_code == 'DEU':
        imputation_adjusted /= 2

    if not return_split:
        return tax_deficits['COLLECTIBLE_TAX_DEFICIT'].sum() + imputation_adjusted

    else:
        own_tax_deficit = tax_deficits[
            tax_deficits['Parent jurisdiction (alpha-3 code)'] == taxing_country_code
        ]['COLLECTIBLE_TAX_DEFICIT'].iloc[0]

        if taxing_country_code == 'USA':
            collected_from_the_US = 0

        else:
            collected_from_the_US = tax_deficits[
                tax_deficits['Parent jurisdiction (alpha-3 code)'] == 'USA'
            ]['COLLECTIBLE_TAX_DEFICIT'].iloc[0]

        collected_from_other_multinationals = imputation + imputation_adjusted

        total = own_tax_deficit + collected_from_the_US + collected_from_other_multinationals

        return own_tax_deficit, collected_from_the_US, collected_from_other_multinationals, total
class GlobalSalesCalculator (winsorize_export_percs=True)
Expand source code
class GlobalSalesCalculator:

    def __init__(self, winsorize_export_percs=True):

        self.aamne_preprocessor = AnalyticalAMNEPreprocessor()

        self.oecd_preprocessor = CbCRPreprocessor()
        self.oecd = self.oecd_preprocessor.get_preprocessed_revenue_data()

        self.trade_stat_processor = TradeStatisticsProcessor(
            year=2016,
            winsorize_export_percs=winsorize_export_percs,
            US_only=False
        )

        self.US_sales_calculator = SalesCalculator(year=2016)

        self.basic_revenue_columns = ['RELATED_PARTY_REVENUES', 'TOTAL_REVENUES', 'UNRELATED_PARTY_REVENUES'].copy()

        self.geo_columns = [
            'PARENT_COUNTRY_CODE', 'PARENT_COUNTRY_NAME', 'AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME'
        ].copy()

    def get_foreign_sales_split(self):

        foreign_aamne_data = self.aamne_preprocessor.get_extended_foreign_analytical_amne_data()

        oecd_foreign = self.oecd[
            self.oecd['PARENT_COUNTRY_CODE'] != self.oecd['AFFILIATE_COUNTRY_CODE']
        ].copy()

        merged_df_foreign = oecd_foreign.merge(
            foreign_aamne_data,
            how='left',
            on='AFFILIATE_COUNTRY_CODE'
        )

        for revenue_column in self.basic_revenue_columns:
            for perc_column in ['PERC_TO_AFFILIATE_COUNTRY', 'PERC_TO_HEADQUARTER_COUNTRY', 'PERC_TO_OTHER_COUNTRY']:

                suffix = perc_column.replace('PERC', '')

                merged_df_foreign[revenue_column + suffix] = \
                    merged_df_foreign[revenue_column] * merged_df_foreign[perc_column]

        merged_df_foreign.drop(
            columns=[
                'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES',
                'UNRELATED_PARTY_REVENUES', 'CONTINENT_CODE',
                'PERC_TO_AFFILIATE_COUNTRY', 'PERC_TO_HEADQUARTER_COUNTRY',
                'PERC_TO_OTHER_COUNTRY'
            ],
            inplace=True
        )

        return merged_df_foreign.copy()

    def get_domestic_sales_split(self):

        domestic_aamne_data = self.aamne_preprocessor.get_extended_domestic_analytical_amne_data()

        oecd_domestic = self.oecd[
            self.oecd['PARENT_COUNTRY_CODE'] == self.oecd['AFFILIATE_COUNTRY_CODE']
        ].copy()

        merged_df_domestic = oecd_domestic.merge(
            domestic_aamne_data,
            how='left',
            on='PARENT_COUNTRY_CODE'
        )

        for revenue_column in self.basic_revenue_columns:
            merged_df_domestic[revenue_column + '_TO_HEADQUARTER_COUNTRY'] = \
                merged_df_domestic['PERC_DOMESTIC_SALES'] * merged_df_domestic[revenue_column]

            merged_df_domestic[revenue_column + '_TO_AFFILIATE_COUNTRY'] = 0

            merged_df_domestic[revenue_column + '_TO_OTHER_COUNTRY'] = \
                merged_df_domestic['PERC_TO_OTHER_COUNTRY'] * merged_df_domestic[revenue_column]

        merged_df_domestic.drop(
            columns=[
                'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES',
                'UNRELATED_PARTY_REVENUES', 'CONTINENT_CODE',
                'PERC_DOMESTIC_SALES', 'PERC_TO_OTHER_COUNTRY'
            ],
            inplace=True
        )

        return merged_df_domestic.copy()

    def get_complete_revenue_split(self):

        merged_df_foreign = self.get_foreign_sales_split()
        merged_df_domestic = self.get_domestic_sales_split()

        complete_df = pd.concat([merged_df_domestic, merged_df_foreign], axis=0)

        headquarter_country_columns = []
        affiliate_country_columns = []
        other_country_columns = []

        for column in complete_df.columns:
            if column in self.geo_columns:
                continue

            elif 'HEADQUARTER' in column:
                headquarter_country_columns.append(column)

            elif 'AFFILIATE' in column:
                affiliate_country_columns.append(column)

            else:
                other_country_columns.append(column)

        headquarter_df = complete_df[self.geo_columns + headquarter_country_columns].copy()
        affiliate_df = complete_df[self.geo_columns + affiliate_country_columns].copy()
        other_country_df = complete_df[self.geo_columns + other_country_columns].copy()

        return headquarter_df, affiliate_df, other_country_df

    def get_already_attributed_sales(self):

        headquarter_df, affiliate_df, _ = self.get_complete_revenue_split()

        headquarter_df['ULTIMATE_DESTINATION_CODE'] = headquarter_df['PARENT_COUNTRY_CODE'].values
        headquarter_df['ULTIMATE_DESTINATION_NAME'] = headquarter_df['PARENT_COUNTRY_NAME'].values

        affiliate_df['ULTIMATE_DESTINATION_CODE'] = affiliate_df['AFFILIATE_COUNTRY_CODE'].values
        affiliate_df['ULTIMATE_DESTINATION_NAME'] = affiliate_df['AFFILIATE_COUNTRY_NAME'].values

        headquarter_df.rename(
            columns={
                'RELATED_PARTY_REVENUES_TO_HEADQUARTER_COUNTRY': 'RELATED_PARTY_REVENUES',
                'UNRELATED_PARTY_REVENUES_TO_HEADQUARTER_COUNTRY': 'UNRELATED_PARTY_REVENUES',
                'TOTAL_REVENUES_TO_HEADQUARTER_COUNTRY': 'TOTAL_REVENUES'
            },
            inplace=True
        )

        affiliate_df.rename(
            columns={
                'RELATED_PARTY_REVENUES_TO_AFFILIATE_COUNTRY': 'RELATED_PARTY_REVENUES',
                'UNRELATED_PARTY_REVENUES_TO_AFFILIATE_COUNTRY': 'UNRELATED_PARTY_REVENUES',
                'TOTAL_REVENUES_TO_AFFILIATE_COUNTRY': 'TOTAL_REVENUES'
            },
            inplace=True
        )

        already_attributed = pd.concat([headquarter_df, affiliate_df], axis=0)

        return already_attributed.copy()

    def attribute_other_country_sales(self):

        _, _, to_be_attributed = self.get_complete_revenue_split()

        df = self.trade_stat_processor.load_data_with_imputations()

        merged_df = to_be_attributed.merge(
            df,
            how='left',
            on='AFFILIATE_COUNTRY_CODE'
        )

        totals = {}
        merged_df['KEY'] = merged_df['PARENT_COUNTRY_CODE'] + merged_df['AFFILIATE_COUNTRY_CODE']

        for key in merged_df['KEY'].unique():
            restricted_df = merged_df[merged_df['KEY'] == key].copy()

            totals[key] = restricted_df[
                restricted_df['OTHER_COUNTRY_CODE'] != key[:3]
            ]['ALL_EXPORTS'].sum()

        merged_df['EXPORT_PERC'] = merged_df.apply(
            lambda row: row['ALL_EXPORTS'] / totals[
                row['PARENT_COUNTRY_CODE'] + row['AFFILIATE_COUNTRY_CODE']
            ],
            axis=1
        )

        merged_df = merged_df[merged_df['PARENT_COUNTRY_CODE'] != merged_df['OTHER_COUNTRY_CODE']].copy()

        columns_to_drop = []

        for revenue_column in ['UNRELATED_PARTY_REVENUES', 'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES']:
            column_to_drop = revenue_column + '_TO_OTHER_COUNTRY'

            columns_to_drop.append(column_to_drop)

            merged_df[revenue_column] = \
                merged_df[column_to_drop] * merged_df['EXPORT_PERC']

        columns_to_drop += ['ALL_EXPORTS', 'EXPORT_PERC', 'KEY']

        merged_df.drop(columns=columns_to_drop, inplace=True)

        merged_df.rename(
            columns={
                'OTHER_COUNTRY_CODE': 'ULTIMATE_DESTINATION_CODE'
            },
            inplace=True
        )

        return merged_df.copy()

    def get_simplified_sales_mapping_without_US(self):

        already_attributed = self.get_already_attributed_sales()

        already_attributed.drop(
            columns=[
                'AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME',
                'PARENT_COUNTRY_NAME', 'ULTIMATE_DESTINATION_NAME'
            ],
            inplace=True
        )

        merged_df = self.attribute_other_country_sales()

        merged_df.drop(
            columns=['AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME', 'PARENT_COUNTRY_NAME'],
            inplace=True
        )

        merged_df = pd.concat(
            [already_attributed, merged_df],
            axis=0
        )

        merged_df = merged_df.groupby(
            ['PARENT_COUNTRY_CODE', 'ULTIMATE_DESTINATION_CODE']
        ).sum().reset_index()

        return merged_df[merged_df['PARENT_COUNTRY_CODE'] != 'USA'].copy()

    def get_simplified_sales_mapping(self):

        sales_mapping_excl_US = self.get_simplified_sales_mapping_without_US()

        US_sales_mapping = self.US_sales_calculator.get_final_dataframe()

        US_sales_mapping.drop(
            columns=['AFFILIATE_COUNTRY_CODE', 'EXPORT_PERC', 'AFFILIATE_COUNTRY_NAME'],
            inplace=True
        )

        US_sales_mapping = US_sales_mapping.groupby('OTHER_COUNTRY_CODE').sum().reset_index()

        US_sales_mapping['PARENT_COUNTRY_CODE'] = 'USA'

        US_sales_mapping.rename(
            columns={
                'OTHER_COUNTRY_CODE': 'ULTIMATE_DESTINATION_CODE'
            },
            inplace=True
        )

        sales_mapping = pd.concat([sales_mapping_excl_US, US_sales_mapping], axis=0)

        return sales_mapping.copy()

Methods

def attribute_other_country_sales(self)
Expand source code
def attribute_other_country_sales(self):

    _, _, to_be_attributed = self.get_complete_revenue_split()

    df = self.trade_stat_processor.load_data_with_imputations()

    merged_df = to_be_attributed.merge(
        df,
        how='left',
        on='AFFILIATE_COUNTRY_CODE'
    )

    totals = {}
    merged_df['KEY'] = merged_df['PARENT_COUNTRY_CODE'] + merged_df['AFFILIATE_COUNTRY_CODE']

    for key in merged_df['KEY'].unique():
        restricted_df = merged_df[merged_df['KEY'] == key].copy()

        totals[key] = restricted_df[
            restricted_df['OTHER_COUNTRY_CODE'] != key[:3]
        ]['ALL_EXPORTS'].sum()

    merged_df['EXPORT_PERC'] = merged_df.apply(
        lambda row: row['ALL_EXPORTS'] / totals[
            row['PARENT_COUNTRY_CODE'] + row['AFFILIATE_COUNTRY_CODE']
        ],
        axis=1
    )

    merged_df = merged_df[merged_df['PARENT_COUNTRY_CODE'] != merged_df['OTHER_COUNTRY_CODE']].copy()

    columns_to_drop = []

    for revenue_column in ['UNRELATED_PARTY_REVENUES', 'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES']:
        column_to_drop = revenue_column + '_TO_OTHER_COUNTRY'

        columns_to_drop.append(column_to_drop)

        merged_df[revenue_column] = \
            merged_df[column_to_drop] * merged_df['EXPORT_PERC']

    columns_to_drop += ['ALL_EXPORTS', 'EXPORT_PERC', 'KEY']

    merged_df.drop(columns=columns_to_drop, inplace=True)

    merged_df.rename(
        columns={
            'OTHER_COUNTRY_CODE': 'ULTIMATE_DESTINATION_CODE'
        },
        inplace=True
    )

    return merged_df.copy()
def get_already_attributed_sales(self)
Expand source code
def get_already_attributed_sales(self):

    headquarter_df, affiliate_df, _ = self.get_complete_revenue_split()

    headquarter_df['ULTIMATE_DESTINATION_CODE'] = headquarter_df['PARENT_COUNTRY_CODE'].values
    headquarter_df['ULTIMATE_DESTINATION_NAME'] = headquarter_df['PARENT_COUNTRY_NAME'].values

    affiliate_df['ULTIMATE_DESTINATION_CODE'] = affiliate_df['AFFILIATE_COUNTRY_CODE'].values
    affiliate_df['ULTIMATE_DESTINATION_NAME'] = affiliate_df['AFFILIATE_COUNTRY_NAME'].values

    headquarter_df.rename(
        columns={
            'RELATED_PARTY_REVENUES_TO_HEADQUARTER_COUNTRY': 'RELATED_PARTY_REVENUES',
            'UNRELATED_PARTY_REVENUES_TO_HEADQUARTER_COUNTRY': 'UNRELATED_PARTY_REVENUES',
            'TOTAL_REVENUES_TO_HEADQUARTER_COUNTRY': 'TOTAL_REVENUES'
        },
        inplace=True
    )

    affiliate_df.rename(
        columns={
            'RELATED_PARTY_REVENUES_TO_AFFILIATE_COUNTRY': 'RELATED_PARTY_REVENUES',
            'UNRELATED_PARTY_REVENUES_TO_AFFILIATE_COUNTRY': 'UNRELATED_PARTY_REVENUES',
            'TOTAL_REVENUES_TO_AFFILIATE_COUNTRY': 'TOTAL_REVENUES'
        },
        inplace=True
    )

    already_attributed = pd.concat([headquarter_df, affiliate_df], axis=0)

    return already_attributed.copy()
def get_complete_revenue_split(self)
Expand source code
def get_complete_revenue_split(self):

    merged_df_foreign = self.get_foreign_sales_split()
    merged_df_domestic = self.get_domestic_sales_split()

    complete_df = pd.concat([merged_df_domestic, merged_df_foreign], axis=0)

    headquarter_country_columns = []
    affiliate_country_columns = []
    other_country_columns = []

    for column in complete_df.columns:
        if column in self.geo_columns:
            continue

        elif 'HEADQUARTER' in column:
            headquarter_country_columns.append(column)

        elif 'AFFILIATE' in column:
            affiliate_country_columns.append(column)

        else:
            other_country_columns.append(column)

    headquarter_df = complete_df[self.geo_columns + headquarter_country_columns].copy()
    affiliate_df = complete_df[self.geo_columns + affiliate_country_columns].copy()
    other_country_df = complete_df[self.geo_columns + other_country_columns].copy()

    return headquarter_df, affiliate_df, other_country_df
def get_domestic_sales_split(self)
Expand source code
def get_domestic_sales_split(self):

    domestic_aamne_data = self.aamne_preprocessor.get_extended_domestic_analytical_amne_data()

    oecd_domestic = self.oecd[
        self.oecd['PARENT_COUNTRY_CODE'] == self.oecd['AFFILIATE_COUNTRY_CODE']
    ].copy()

    merged_df_domestic = oecd_domestic.merge(
        domestic_aamne_data,
        how='left',
        on='PARENT_COUNTRY_CODE'
    )

    for revenue_column in self.basic_revenue_columns:
        merged_df_domestic[revenue_column + '_TO_HEADQUARTER_COUNTRY'] = \
            merged_df_domestic['PERC_DOMESTIC_SALES'] * merged_df_domestic[revenue_column]

        merged_df_domestic[revenue_column + '_TO_AFFILIATE_COUNTRY'] = 0

        merged_df_domestic[revenue_column + '_TO_OTHER_COUNTRY'] = \
            merged_df_domestic['PERC_TO_OTHER_COUNTRY'] * merged_df_domestic[revenue_column]

    merged_df_domestic.drop(
        columns=[
            'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES',
            'UNRELATED_PARTY_REVENUES', 'CONTINENT_CODE',
            'PERC_DOMESTIC_SALES', 'PERC_TO_OTHER_COUNTRY'
        ],
        inplace=True
    )

    return merged_df_domestic.copy()
def get_foreign_sales_split(self)
Expand source code
def get_foreign_sales_split(self):

    foreign_aamne_data = self.aamne_preprocessor.get_extended_foreign_analytical_amne_data()

    oecd_foreign = self.oecd[
        self.oecd['PARENT_COUNTRY_CODE'] != self.oecd['AFFILIATE_COUNTRY_CODE']
    ].copy()

    merged_df_foreign = oecd_foreign.merge(
        foreign_aamne_data,
        how='left',
        on='AFFILIATE_COUNTRY_CODE'
    )

    for revenue_column in self.basic_revenue_columns:
        for perc_column in ['PERC_TO_AFFILIATE_COUNTRY', 'PERC_TO_HEADQUARTER_COUNTRY', 'PERC_TO_OTHER_COUNTRY']:

            suffix = perc_column.replace('PERC', '')

            merged_df_foreign[revenue_column + suffix] = \
                merged_df_foreign[revenue_column] * merged_df_foreign[perc_column]

    merged_df_foreign.drop(
        columns=[
            'RELATED_PARTY_REVENUES', 'TOTAL_REVENUES',
            'UNRELATED_PARTY_REVENUES', 'CONTINENT_CODE',
            'PERC_TO_AFFILIATE_COUNTRY', 'PERC_TO_HEADQUARTER_COUNTRY',
            'PERC_TO_OTHER_COUNTRY'
        ],
        inplace=True
    )

    return merged_df_foreign.copy()
def get_simplified_sales_mapping(self)
Expand source code
def get_simplified_sales_mapping(self):

    sales_mapping_excl_US = self.get_simplified_sales_mapping_without_US()

    US_sales_mapping = self.US_sales_calculator.get_final_dataframe()

    US_sales_mapping.drop(
        columns=['AFFILIATE_COUNTRY_CODE', 'EXPORT_PERC', 'AFFILIATE_COUNTRY_NAME'],
        inplace=True
    )

    US_sales_mapping = US_sales_mapping.groupby('OTHER_COUNTRY_CODE').sum().reset_index()

    US_sales_mapping['PARENT_COUNTRY_CODE'] = 'USA'

    US_sales_mapping.rename(
        columns={
            'OTHER_COUNTRY_CODE': 'ULTIMATE_DESTINATION_CODE'
        },
        inplace=True
    )

    sales_mapping = pd.concat([sales_mapping_excl_US, US_sales_mapping], axis=0)

    return sales_mapping.copy()
def get_simplified_sales_mapping_without_US(self)
Expand source code
def get_simplified_sales_mapping_without_US(self):

    already_attributed = self.get_already_attributed_sales()

    already_attributed.drop(
        columns=[
            'AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME',
            'PARENT_COUNTRY_NAME', 'ULTIMATE_DESTINATION_NAME'
        ],
        inplace=True
    )

    merged_df = self.attribute_other_country_sales()

    merged_df.drop(
        columns=['AFFILIATE_COUNTRY_CODE', 'AFFILIATE_COUNTRY_NAME', 'PARENT_COUNTRY_NAME'],
        inplace=True
    )

    merged_df = pd.concat(
        [already_attributed, merged_df],
        axis=0
    )

    merged_df = merged_df.groupby(
        ['PARENT_COUNTRY_CODE', 'ULTIMATE_DESTINATION_CODE']
    ).sum().reset_index()

    return merged_df[merged_df['PARENT_COUNTRY_CODE'] != 'USA'].copy()