# -*- coding: utf-8 -*-
#The MIT License (MIT)
#
#Copyright (c) 2014 Lukasz Mentel
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE.
'''
Module with convenience functions for parsing files
'''
from __future__ import print_function
import itertools
from collections import defaultdict
[docs]def contains(string, query):
'Check if `string` contains `query`'
return string.find(query) > -1
[docs]def locatelinenos(filename, tolocate):
'''
Given a file and a list of strings return a dict with string as keys
and line numbers in which they appear a values.
Args:
filename : str
Name of the file
tolocate : list of tuples
List of tuples with strings to find (queries) as first elements and
integer offset values as second
Returns:
out : dict
Dictionary whose keys are indices corresponding to item in input list
and values are lists of line numbers in which those string appear
TODO:
- add option to ignore the case of the strings to search
'''
out = defaultdict(list)
for lineno, line in enumerate(open(filename, 'r')):
for idx, (query, offset) in enumerate(tolocate):
if contains(line, query):
out[idx].append(lineno + offset)
return out
[docs]def getlines(filename, tolocate):
'''
Return the lines from the files based on `tolocate`
Args:
filename : str
Name of the file
tolocate : list of tuples
List of tuples with strings to find (queries) as first elements and
integer offset values as second
Return:
'''
located = locatelinenos(filename, tolocate)
if len(tolocate) == len(located):
for k, v in located.items():
if len(v) > 1:
raise ValueError('multiple lines found for "{0}": {1}'.format(
tolocate[k][0], ', '.join([str(x) for x in v])))
startlno = min(list(itertools.chain(*located.values())))
endlno = max(list(itertools.chain(*located.values())))
return getchunk(filename, startlno, endlno)
else:
# TODO: this needs to be corrected to be more informative
raise ValueError('len(tolocate) != len(located): {0} != {1}'.format(
len(tolocate), len(located)))
[docs]def getchunk(filename, startlno, endlno):
'''
Get a list of lines from a file between specified line numbers `startlno`
and `endlno`.
Args:
filename : str
Name of the file to process
startlno : int
Number of the first line to obtain
endlno : int
Number of the last line to obtain
Returns:
lines : list
A list of lines from the file `filename` between line numbers\
`startlno` and `endlno`
'''
fobj = open(filename, 'r')
fileiter = iter(fobj)
for _ in range(startlno):
next(fileiter)
return [next(fileiter) for _ in range(endlno - startlno)]
[docs]def take(seq, num):
'''
Iterate over a sequence `seq` `num` times and return the list of the
elements iterated over.
'''
return [next(seq) for _ in range(num)]
[docs]def parsepairs(los, sep="="):
'''
Parse a given list of strings "los" into a dictionary based on
separation by "sep" character and return the dictionary.
'''
out = []
for line in los:
if sep in line:
(name, value) = line.split(sep)
out.append((name.strip(), float(value)))
return dict(out)
[docs]def sliceafter(seq, item, num):
'''
Return "num" elements of a sequence "seq" present after the item "item".
'''
it = iter(seq)
for element in it:
if item in element:
return [next(it) for _ in range(num)]
[docs]def slicebetween(string, start, end):
'''
Return a slice of the `string` between phrases `start` and `end`.
'''
istart = string.index(start)
iend = string[istart:].index(end)
return string[istart + len(start):istart + iend]