Search code examples
pythoncsvbcp

Export a BCP file into CSV format


I would like to extract data from Data Charity Register Extract and export it as an Excel file (.csv).

This person published in Github his code to do so by using an import.py file

Here is how I did: in two ways:

The first way:

  1. I download file: RegPlusExtract_November_2015.zip from the link above and put it in C:\Python27 (where I also install Python)

  2. I open the code file below (import.py) and the one after (bcp.py) in IDLE and run import.py (using F5). I put these two .py files in C:\Python27

#!/usr/bin/env python
import bcp
import zipfile
import sys

cc_files = {
    "extract_acct_submit": [
      "regno",
      "submit_date",
      "arno",
      "fyend"
    ], 
    "extract_aoo_ref": [
      "aootype",
      "aookey",
      "aooname",
      "aoosort",
      "welsh",
      "master",
      "code"
    ], 
    "extract_ar_submit": [
      "regno",
      "arno",
      "submit_date"
    ], 
    "extract_charity": [
      "regno",
      "subno",
      "name",
      "orgtype",
      "gd",
      "aob",
      "aob_defined",
      "nhs",
      "ha_no",
      "corr",
      "add1",
      "add2",
      "add3",
      "add4",
      "add5",
      "postcode",
      "phone",
      "fax",
    ], 
    "extract_charity_aoo": [
      "regno",
      "aootype",
      "aookey",
      "welsh",
      "master"
    ], 
    "extract_class": [
      "regno",
      "class"
    ], 
    "extract_class_ref": [
      "classno",
      "classtext",
    ], 
    "extract_financial": [
      "regno",
      "fystart",
      "fyend",
      "income",
      "expend"
    ], 
    "extract_main_charity": [
      "regno",
      "coyno",
      "trustees",
      "fyend",
      "welsh",
      "incomedate",
      "income",
      "grouptype",
      "email",
      "web"
    ], 
    "extract_name": [
      "regno",
      "subno",
      "nameno",
      "name"
    ], 
    "extract_objects": [
      "regno",
      "subno",
      "seqno",
      "object"
    ], 
    "extract_partb": [
      "regno",
      "artype",
      "fystart",
      "fyend",
      "inc_leg",
      "inc_end",
      "inc_vol",
      "inc_fr",
      "inc_char",
      "inc_invest",
      "inc_other",
      "inc_total",
      "invest_gain",
      "asset_gain",
      "pension_gain",
      "exp_vol",
      "exp_trade",
      "exp_invest",
      "exp_grant",
      "exp_charble",
      "exp_gov",
      "exp_other",
      "exp_total",
      "exp_support",
      "exp_dep",
      "reserves",
      "asset_open",
      "asset_close",
      "fixed_assets",
      "open_assets",
      "invest_assets",
      "cash_assets",
      "current_assets",
      "credit_1",
      "credit_long",
      "pension_assets",
      "total_assets",
      "funds_end",
      "funds_restrict",
      "funds_unrestrict",
      "funds_total",
      "employees",
      "volunteers",
      "cons_acc",
      "charity_acc"
    ], 
    "extract_registration": [
      "regno",
      "subno",
      "regdate",
      "remdate",
      "remcode"
    ], 
    "extract_remove_ref": [
      "code",
      "text"
    ], 
    "extract_trustee": [
      "regno",
      "trustee"
    ]
}

def import_zip(zip_file):
    zf = zipfile.ZipFile(zip_file, 'r')
    print 'Opened zip file: %s' % zip_file
    for filename in cc_files:
        try:
            bcp_filename = filename + '.bcp'
            csv_filename = filename + '.csv'
            bcpdata = zf.read(bcp_filename)
            bcp.convert(bcpdata, csvfilename=csv_filename, col_headers=cc_files[filename])
            print 'Converted: %s' % bcp_filename
        except KeyError:
            print 'ERROR: Did not find %s in zip file' % bcp_filename

def main():
    zip_file = sys.argv[1]
    import_zip(zip_file)

if __name__ == '__main__':
    main()

#!/usr/bin/env python
import sys
import csv

def convert(bcpdata, csvfilename="", lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', col_headers=None, escapechar='\\', newline='\n'):
    bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
    bcpdata = bcpdata.replace(quote, escapechar + quote)
    bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
    bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
    if csvfilename=="":
        csvfilename = 'converted.csv'
    with open(csvfilename, 'wb') as csvfile:
        if(col_headers):
            writer = csv.writer(csvfile)
            writer.writerow(col_headers)
        csvfile.write('"')
        csvfile.write(bcpdata)
        csvfile.write('"')

def main():
    bcp_filename = sys.argv[1]
    try:
        csv_filename = sys.argv[2]
    except IndexError:
        csv_filename = bcp_filename.replace('.bcp', '.csv')
    with open(bcp_filename, 'rb') as bcpfile:
        bcpdata = bcpfile.read()
        convert(bcpdata, csv_filename)

if __name__ == '__main__':
    main()

It struck me with this error:

>>> ================================ RESTART ================================
>>> 

Traceback (most recent call last):
  File "C:\Python27\bcp.py", line 31, in <module>
    main()
  File "C:\Python27\bcp.py", line 21, in main
    bcp_filename = sys.argv[1]
IndexError: list index out of range
>>> ================================ RESTART ================================
>>> 

Traceback (most recent call last):
  File "C:\Python27\import.py", line 175, in <module>
    main()
  File "C:\Python27\import.py", line 171, in main
    zip_file = sys.argv[1]
IndexError: list index out of range
>>> 

Can anyone please point out where it's gone wrong?

Second way:

Then I try to use Command Prompt in Windows to run the file: First I set path to where I save all files (C:\python27) Then I run in command prompt

python import RegPlusExtract_November_2015.zip

It struck me with: error:

 File"<stdin>", line 1

python import RegPlusExtract_November_2015.zip

Please can anyone point out where I get it wrong or show me how to extract csv file from the data link above.


Solution

  • When you run a script from IDLE, you cannot pass parameters in sys.argv. So in that use case the error is normal. But after the errors have shown up on F5, you should be able to directly call:

    zip_file = 'RegPlusExtract_November_2015.zip'
    import_zip(zip_file)
    

    it should allow you to process your data.

    For second way in command prompt, you must give the exact name of script file. The command should be:

    python import.py RegPlusExtract_November_2015.zip
    

    But anyway, it is bad to put your own scripts and other data files in Python directory. C:\Python27 should only contain files from the initial Python distribution, and other general utilities, not you local scripts. The usual way is to add c:\Python to your PATH environment, and use a dedicated directory for your Data Charity processing