# -*- coding: utf-8 -*-
import sys, os
import re, pprint
import ConfigParser, optparse
import csv
def triplify( row ):
key = row[keyfield]
for p in props:
v = row[p]
if not v or len(v)==0:
continue
if splitby:
vv = splitby.split(v)
for v in vv:
writer.writerow( dict(zip(outfilds, [key, p, v.strip()])) )
else:
writer.writerow( dict(zip(outfilds, [key, p, v])) )
parser = optparse.OptionParser()
parser.add_option("-s", "--split", dest="splitby", default = None,
help="Split multi-value fields at the given character(s)", )
parser.add_option("-k", "--key", dest="keyfield", default = None,
help="Use this field as unique key (subject id)", )
(options, args) = parser.parse_args()
splitby = options.splitby
keyfield = options.keyfield
if splitby:
splitby = re.compile("[%s]" % splitby)
if len(args)>0:
csvin = open(args[0])
else:
csvin = sys.stdin
reader = csv.DictReader(csvin, delimiter='\t', quotechar='\\')
csvout = sys.stdout
props = args[1:]
if not keyfield:
keyfield = fields[0]
if len(props) == 0:
i = fields.index(keyfield)
props = fields[:i] + fields[i+1:]
outfilds = [ keyfield, "property", "value" ]
writer = csv.DictWriter(csvout, outfilds, delimiter='\t', quotechar='\\')
writer.writerow(dict(zip(outfilds, outfilds)))
for row in reader:
triplify(row)
if csvout and csvout != sys.stdout:
csvout.close()
triplify.py
application/x-python, 1410 bytes (load raw)
