
且构网 - 分享程序员编程开发的那些事


更新时间:2023-02-04 19:28:29


Just use the csv module to read the data, massage this per row, and write it out again.


You can create 'empty' columns by using None or an empty string '' as the value for that column. Vice-versa, reading empty columns (so between consecutive tabs) gives you empty strings.

with open('input.csv', newline='') as infile, open('output.csv', 'w', newline='') as outfile:
    reader = csv.reader(infile, delimiter='\t')
    writer = csv.writer(outfile, delimiter='\t')

    for row in reader:
        if len(row) > 3:
            # detect if `c` is missing (insert your own test here)
            # sample test looks for 3 consecutive columns with values f, o and o
            if row[3:6] == ['f', 'o', 'o']
                # insert an empty `c`
                row.insert(3, '')

        if len(row) < 5:
            # make row at least 5 columns long
            row.extend([''] * (5 - len(row)))
        if len(row) > 5:
            # merge any excess columns into the 5th column
            row[4] = ','.join(row[4:])
            del row[5:]




Instead of using a flag, use the reader as an iterator (calling next() on it to get the next row instead of using a for loop):

with open('input.csv', newline='') as infile, open('output.csv', 'w', newline='') as outfile:
    reader = csv.reader(infile, delimiter='\t')
    writer = csv.writer(outfile, delimiter='\t')

    row = None

        next(reader)  # skip the `A   B` headers.

        line = next(reader)  # prime our loop
        while True:
            while not line[0]:
                # advance to the first line with a column 0 value
                line = next(reader)

            row = line  # start off with the first number and column
            line = next(reader)  # prime the subsequent lines loop

            while line and not line[0]:
                # process subsequent lines until we find one with a value in col 0 again
                cell = line[1]
                if cell == 'foo':    # detect column d
                    row.append('')   # and insert empty value
                line = next(reader)

            # consolidate, write
            if len(row) < 5:
                # make row at least 5 columns long
                row.extend([''] * (5 - len(row)))
            if len(row) > 5:
                # merge any excess columns into the 5th column
                row[4] = ','.join(row[4:])
                del row[5:]

            row = None
    except StopIteration:
        # reader is done, no more lines to come
        # process the last row if there was one
        if row is not None:
            # consolidate, write
            if len(row) < 5:
                # make row at least 5 columns long
                row.extend([''] * (5 - len(row)))
            if len(row) > 5:
                # merge any excess columns into the 5th column
                row[4] = ','.join(row[4:])
                del row[5:]
