Page MenuHomePhabricator

D8009.id.diff
No OneTemporary

D8009.id.diff

Index: steve.py
===================================================================
--- /dev/null
+++ steve.py
@@ -0,0 +1,173 @@
+import csv
+import datetime
+import re
+import copy
+
+#*******************************************************************************************************************************************
+#This programme solves the problem of cleaning and classifying CSV files before loading them into my summary pivot table for
+#record keeping. The pgoramme takes two raw csv files with transaction information and 1) changes dates, currencies, and spend into
+#consistent values #2) groups by week and by merchant category 3) outputs a new transaction file which is the input for an Excel pivot table.
+#Using Excel in the 'last mile' was decided because of the easy pivot table functionality once clean data is provided.
+#********************************************************************************************************************
+
+
+GBPUSD = 1.6
+
+#*******************************************************************************************************************************************
+#load functions
+
+def load_csv_as_dict(file_name):
+ '''Loads a csv as a dict.
+ Args: filename.csv
+ Returns: a csv as dict object (which has a bunch of dicts in it; is this efficient?'''
+ open_file = open(file_name, 'rU',)
+ loaded_csv_as_dict = csv.DictReader(open_file,fieldnames=None,restkey=None,restval=None,dialect='excel')
+ return loaded_csv_as_dict
+
+def load_merchant_categories(merchant_category_loaded_csv):
+ '''Loads merchant categories.
+
+ Args: merchant_category_file_ref, a CSV loaded as a dictionary that maps merchants to categoreis. For example:
+ {Keys: Citibank ATM, Values: Cash}{Keys: Starbucks, Values: Food}
+
+ Returns: a new dict (loaded_merchant_categories) that maps keys to merchants. For example:
+
+ {Starbucks: Food, Citibnak ATM: Cash, ...}.'''
+
+ merchant_categories = {}
+ for line in merchant_category_loaded_csv:
+ t = []
+ t.append(line['Key'])
+ t.append(line['Value'])
+ key = t[0]
+ value = t[1]
+ merchant_categories[key]= value
+ t[:] = []
+ return merchant_categories
+
+
+###Transform the csv dict objects and write to CSV#####################################################################################################################
+
+def transform_transaction_record_mint(transaction_record, merchant_categories):
+ '''Using raw csv object, changes dates, currencies, and spend into consistent values then groups by week and by merchant category
+ Args: transaction_record. Type: csv object, iterable by row. each row is a dictionary where 'column' headers are the keys.
+ merchant_categories. Type: csv object, same as above
+
+ returns: transformed_transaction_record. Type: list. Contains: dictionaries of with the new information'''
+
+ transformed_transaction_record = []
+ for line in transaction_record:
+ #a/ get week number and date
+ combo_dict = {}
+
+ week_list = re.split('[/]',line['Date'])
+ week_tuple = tuple(week_list)
+ dt = datetime.date(int(week_tuple[2]), int(week_tuple[0]), int(week_tuple[1]))
+ wk_num = dt.isocalendar()[1]
+ combo_dict['Week_num'] = wk_num
+ x = format(dt)
+ combo_dict['Date'] = x
+ combo_dict['Week_num'] = x[:4] + '_' + str(wk_num)
+
+ #b/ map merchants to their groups
+ combo_dict['Merch_name'] = line['Description']
+ category = merchant_categories.get(line['Description'],'n/a')
+ combo_dict['Category']= category
+
+ #c/ get the correct float val, adjusting for deb or cred
+ float_val = line['Amount']
+ try:
+ spend_amt = float(float_val)
+ except:
+ spend_amt = float(float_val.replace(",",""))
+ if line['Transaction Type'] == 'debit':
+ spend_amt = spend_amt*-1
+
+ combo_dict['Amount'] = round(spend_amt/GBPUSD)
+ combo_dict['Account_name'] = line['Account Name']
+
+ #d/ note that the spend is from carolyn
+ combo_dict['Account_owner']='CS'
+ transformed_transaction_record.append(combo_dict)
+ return transformed_transaction_record
+
+def transform_transaction_record_citi(transaction_record, merchant_categories):
+ ''' Same as for transform_transaction_record_mint, but adjusted for the citi transaction_record'''
+ transformed_transaction_record = []
+ for line in transaction_record:
+ combo_dict = {}
+
+ #a/ get week number and date
+ week_list = re.split('[/]',line['Date'])
+ week_tuple = tuple(week_list)
+ dt = datetime.date(int(week_tuple[2]), int(week_tuple[1]), int(week_tuple[0]))
+ wk_num = dt.isocalendar()[1]
+
+ x = format(dt)
+ combo_dict['Date'] = x
+ combo_dict['Week_num'] = x[:4] + '_' + str(wk_num)
+
+ #b/ clean merch record so that it can be looked-up in merchant categories
+ merchant_fullname = line['Trans'] #returns string
+ if merchant_fullname.find('TRANS') != -1:
+ combo_dict['Merch_name'] = merchant_fullname[:34] #concatenate string so that it can be looked-up
+ elif merchant_fullname.find('LNK') != -1:
+ combo_dict['Merch_name'] = merchant_fullname[:17]
+ elif merchant_fullname.find('OUTGOING') !=-1:
+ combo_dict['Merch_name'] = merchant_fullname[:8]
+ else:
+ combo_dict['Merch_name'] = merchant_fullname
+
+ category = merchant_categories.get(combo_dict['Merch_name'],'n/a')
+ combo_dict['Category']= category
+
+ combo_dict['Account_name'] = 'Citi_UK'
+
+ #c/ get the correct float val, adjusting for deb or cred
+ float_val = line['Value']
+ try:
+ spend_amt = float(float_val)
+ except:
+ spend_amt = float(float_val.replace(",",""))
+ combo_dict['Amount'] = spend_amt
+
+ #d/ note where transaction is from
+ combo_dict['Account_owner']='SB'
+ transformed_transaction_record.append(combo_dict)
+ return transformed_transaction_record
+
+def write_to_csv(filename, list_of_dicts_1, list_of_dicts_2):
+ '''Writes a transaction_summary to a csv file. Creates csv.DictWriter object, which uses the fieldnames variable. Fieldnames = column
+ headers in the output csv. Extrasection must be 'ignore', otherwise will not work.
+
+ Args:
+
+ filename: Type: string. For example: file.csv
+
+ list of dicts: type: list. Contains cleaned dictionaries
+
+ Returns: None. Output file written directly to CSV
+ '''
+ for item in list_of_dicts_2:
+ list_of_dicts_1.append(item)
+
+ fieldnames = ['Date', 'Merch_name', 'Category', 'Week_num', 'Account_name', 'Amount','Account_owner']
+
+ with open(filename, 'wb') as csvfile:
+ D_obj = csv.DictWriter(csvfile, delimiter=',',fieldnames=fieldnames, restval='', extrasaction='ignore', dialect='excel')
+ D_obj.writeheader()
+ for each_dict in list_of_dicts_1:
+ D_obj.writerow(each_dict)
+
+###############################################################################################################################################
+#Run programme
+
+merchant_category_loaded_csv = load_csv_as_dict('merch_dict.csv')
+merchant_categories = load_merchant_categories(merchant_category_loaded_csv)
+
+transaction_record_1 = load_csv_as_dict('mint_file.csv')
+transformed_transaction_record_1 = transform_transaction_record_mint(transaction_record_1, merchant_categories)
+
+transaction_record_2 = load_csv_as_dict('citi_file.csv')
+transformed_transaction_record_2 = transform_transaction_record_citi(transaction_record_2, merchant_categories)
+write_to_csv('output_file_20140114_1.csv', transformed_transaction_record_1, transformed_transaction_record_2)

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 15, 9:29 PM (3 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6713931
Default Alt Text
D8009.id.diff (7 KB)

Event Timeline