D8009.id.diff
No OneTemporary
Actions

Size

7 KB

Referenced Files

None

Subscribers

None

D8009.id.diff
View Options

	Index: steve.py
	===================================================================
	--- /dev/null
	+++ steve.py
	@@ -0,0 +1,173 @@
	+import csv
	+import datetime
	+import re
	+import copy
	+
	+#*******************************************************************************************************************************************
	+#This programme solves the problem of cleaning and classifying CSV files before loading them into my summary pivot table for
	+#record keeping. The pgoramme takes two raw csv files with transaction information and 1) changes dates, currencies, and spend into
	+#consistent values #2) groups by week and by merchant category 3) outputs a new transaction file which is the input for an Excel pivot table.
	+#Using Excel in the 'last mile' was decided because of the easy pivot table functionality once clean data is provided.
	+#********************************************************************************************************************
	+
	+
	+GBPUSD = 1.6
	+
	+#*******************************************************************************************************************************************
	+#load functions
	+
	+def load_csv_as_dict(file_name):
	+ '''Loads a csv as a dict.
	+ Args: filename.csv
	+ Returns: a csv as dict object (which has a bunch of dicts in it; is this efficient?'''
	+ open_file = open(file_name, 'rU',)
	+ loaded_csv_as_dict = csv.DictReader(open_file,fieldnames=None,restkey=None,restval=None,dialect='excel')
	+ return loaded_csv_as_dict
	+
	+def load_merchant_categories(merchant_category_loaded_csv):
	+ '''Loads merchant categories.
	+
	+ Args: merchant_category_file_ref, a CSV loaded as a dictionary that maps merchants to categoreis. For example:
	+ {Keys: Citibank ATM, Values: Cash}{Keys: Starbucks, Values: Food}
	+
	+ Returns: a new dict (loaded_merchant_categories) that maps keys to merchants. For example:
	+
	+ {Starbucks: Food, Citibnak ATM: Cash, ...}.'''
	+
	+ merchant_categories = {}
	+ for line in merchant_category_loaded_csv:
	+ t = []
	+ t.append(line['Key'])
	+ t.append(line['Value'])
	+ key = t[0]
	+ value = t[1]
	+ merchant_categories[key]= value
	+ t[:] = []
	+ return merchant_categories
	+
	+
	+###Transform the csv dict objects and write to CSV#####################################################################################################################
	+
	+def transform_transaction_record_mint(transaction_record, merchant_categories):
	+ '''Using raw csv object, changes dates, currencies, and spend into consistent values then groups by week and by merchant category
	+ Args: transaction_record. Type: csv object, iterable by row. each row is a dictionary where 'column' headers are the keys.
	+ merchant_categories. Type: csv object, same as above
	+
	+ returns: transformed_transaction_record. Type: list. Contains: dictionaries of with the new information'''
	+
	+ transformed_transaction_record = []
	+ for line in transaction_record:
	+ #a/ get week number and date
	+ combo_dict = {}
	+
	+ week_list = re.split('[/]',line['Date'])
	+ week_tuple = tuple(week_list)
	+ dt = datetime.date(int(week_tuple[2]), int(week_tuple[0]), int(week_tuple[1]))
	+ wk_num = dt.isocalendar()[1]
	+ combo_dict['Week_num'] = wk_num
	+ x = format(dt)
	+ combo_dict['Date'] = x
	+ combo_dict['Week_num'] = x[:4] + '_' + str(wk_num)
	+
	+ #b/ map merchants to their groups
	+ combo_dict['Merch_name'] = line['Description']
	+ category = merchant_categories.get(line['Description'],'n/a')
	+ combo_dict['Category']= category
	+
	+ #c/ get the correct float val, adjusting for deb or cred
	+ float_val = line['Amount']
	+ try:
	+ spend_amt = float(float_val)
	+ except:
	+ spend_amt = float(float_val.replace(",",""))
	+ if line['Transaction Type'] == 'debit':
	+ spend_amt = spend_amt*-1
	+
	+ combo_dict['Amount'] = round(spend_amt/GBPUSD)
	+ combo_dict['Account_name'] = line['Account Name']
	+
	+ #d/ note that the spend is from carolyn
	+ combo_dict['Account_owner']='CS'
	+ transformed_transaction_record.append(combo_dict)
	+ return transformed_transaction_record
	+
	+def transform_transaction_record_citi(transaction_record, merchant_categories):
	+ ''' Same as for transform_transaction_record_mint, but adjusted for the citi transaction_record'''
	+ transformed_transaction_record = []
	+ for line in transaction_record:
	+ combo_dict = {}
	+
	+ #a/ get week number and date
	+ week_list = re.split('[/]',line['Date'])
	+ week_tuple = tuple(week_list)
	+ dt = datetime.date(int(week_tuple[2]), int(week_tuple[1]), int(week_tuple[0]))
	+ wk_num = dt.isocalendar()[1]
	+
	+ x = format(dt)
	+ combo_dict['Date'] = x
	+ combo_dict['Week_num'] = x[:4] + '_' + str(wk_num)
	+
	+ #b/ clean merch record so that it can be looked-up in merchant categories
	+ merchant_fullname = line['Trans'] #returns string
	+ if merchant_fullname.find('TRANS') != -1:
	+ combo_dict['Merch_name'] = merchant_fullname[:34] #concatenate string so that it can be looked-up
	+ elif merchant_fullname.find('LNK') != -1:
	+ combo_dict['Merch_name'] = merchant_fullname[:17]
	+ elif merchant_fullname.find('OUTGOING') !=-1:
	+ combo_dict['Merch_name'] = merchant_fullname[:8]
	+ else:
	+ combo_dict['Merch_name'] = merchant_fullname
	+
	+ category = merchant_categories.get(combo_dict['Merch_name'],'n/a')
	+ combo_dict['Category']= category
	+
	+ combo_dict['Account_name'] = 'Citi_UK'
	+
	+ #c/ get the correct float val, adjusting for deb or cred
	+ float_val = line['Value']
	+ try:
	+ spend_amt = float(float_val)
	+ except:
	+ spend_amt = float(float_val.replace(",",""))
	+ combo_dict['Amount'] = spend_amt
	+
	+ #d/ note where transaction is from
	+ combo_dict['Account_owner']='SB'
	+ transformed_transaction_record.append(combo_dict)
	+ return transformed_transaction_record
	+
	+def write_to_csv(filename, list_of_dicts_1, list_of_dicts_2):
	+ '''Writes a transaction_summary to a csv file. Creates csv.DictWriter object, which uses the fieldnames variable. Fieldnames = column
	+ headers in the output csv. Extrasection must be 'ignore', otherwise will not work.
	+
	+ Args:
	+
	+ filename: Type: string. For example: file.csv
	+
	+ list of dicts: type: list. Contains cleaned dictionaries
	+
	+ Returns: None. Output file written directly to CSV
	+ '''
	+ for item in list_of_dicts_2:
	+ list_of_dicts_1.append(item)
	+
	+ fieldnames = ['Date', 'Merch_name', 'Category', 'Week_num', 'Account_name', 'Amount','Account_owner']
	+
	+ with open(filename, 'wb') as csvfile:
	+ D_obj = csv.DictWriter(csvfile, delimiter=',',fieldnames=fieldnames, restval='', extrasaction='ignore', dialect='excel')
	+ D_obj.writeheader()
	+ for each_dict in list_of_dicts_1:
	+ D_obj.writerow(each_dict)
	+
	+###############################################################################################################################################
	+#Run programme
	+
	+merchant_category_loaded_csv = load_csv_as_dict('merch_dict.csv')
	+merchant_categories = load_merchant_categories(merchant_category_loaded_csv)
	+
	+transaction_record_1 = load_csv_as_dict('mint_file.csv')
	+transformed_transaction_record_1 = transform_transaction_record_mint(transaction_record_1, merchant_categories)
	+
	+transaction_record_2 = load_csv_as_dict('citi_file.csv')
	+transformed_transaction_record_2 = transform_transaction_record_citi(transaction_record_2, merchant_categories)
	+write_to_csv('output_file_20140114_1.csv', transformed_transaction_record_1, transformed_transaction_record_2)

File Metadata

Mime Type: text/plain
Expires: Tue, Oct 15, 9:29 PM (3 w, 2 d ago)
Storage Engine: blob
Storage Format: Encrypted (AES-256-CBC)
Storage Handle: 6713931
Default Alt Text: D8009.id.diff (7 KB)

D8009.id.diffNo OneTemporaryActions

D8009.id.diffView Options

File Metadata

Event Timeline

D8009.id.diff
No OneTemporary
Actions

D8009.id.diff
View Options