Всем привет. Я текстовый файл размером 3 ГБ, полученный через данные API. Он содержит структуру json внутри текстового файла, как можно прочитать и свести его к формату Dataframe для всех пар значений ключа (столбца) (строки) самым быстрым способом.
4785713 ^ ^ TRAVONE ^ ^ BOM ^ ^ VTZ ^ ^ \N ^ ^ \N ^ ^ \N ^ ^ \N ^ ^ \N ^ ^
{
"origin": "BOM",
"originCity": "Mumbai",
"destination": "VTZ",
"destinationCity": "Vishakhapatnam",
"triptype": "ONEWAY",
"cabinClass": "E",
"adults": "1",
"childs": "1",
"infants": "0",
"excludeLCCSearch": "",
"isNearByAirportSearch": "",
"isNonStopSearch": "",
"onwarddt": "2021-04-07",
"returndt": "",
"orgcc": "IN",
"descc": "IN",
"traveltype": "domestic",
"org": "Mumbai (BOM)",
"dest": "Vishakhapatnam (VTZ)",
"token": "",
"currency": "INR",
"flighttype": "D",
"tier": "",
"jpnumber": "",
"sessionId": "92d08db8-9b32-4c59-9160-6cd965b4edbb",
"platform": "WEB",
"SID": "424f4d56545a313130323032312d30342d30374439326430386462382d396233322d346335392d393136302d36636439363562346564626245",
"requestSource": "UBA",
"fareindicator": ["REGR", "FLEX"],
"tpmvalue": 686
} ^ ^ [
{
"onward_resp": [
{
"supplier": "EaseMyTrip",
"far": {
"tbf": 7128,
"bwm": 8462,
"twm": 1334,
"fbr": [{
"bg": {
"ut": "KG",
"wt": "15"
},
"cn": {
"cnp": 3000,
"csh": 0,
"chp": 2500,
"ref": true,
"commission": 18,
"equivCurrencyCode": "",
"dFValue": "0"
},
"fco": "RCIP",
"fke": "R",
"fin": "0~R~ ~6E~RCIP~1070~~0~45~~X",
"zc": false,
"mk": 0,
"ty": 0,
"stf": 0,
"sf": 0,
"tds": 0,
"tf": 4231,
"tt": 667,
"tra": 0,
"trf": 0,
"zcc": 0,
"zcv": null,
"refundable": true,
"basicFare": 3564,
"baseTransactionAmount": 0,
"otax": [{
"Amount": 667,
"ChargeCode": "Tax",
"ChargeType": "Tax"
}],
"pax_discount": 0
}, {
"bg": {
"ut": "KG",
"wt": "15"
},
"cn": {
"cnp": 3000,
"csh": 0,
"chp": 2500,
"ref": true,
"commission": 18,
"equivCurrencyCode": "",
"dFValue": "0"
},
"fco": null,
"fke": null,
"fin": null,
"zc": false,
"mk": 0,
"ty": 1,
"stf": 0,
"sf": 0,
"tds": 0,
"tf": 4231,
"tt": 667,
"tra": 0,
"trf": 0,
"zcc": 0,
"zcv": null,
"refundable": true,
"basicFare": 3564,
"baseTransactionAmount": 0,
"otax": [{
"Amount": 667,
"ChargeCode": "Tax",
"ChargeType": "Tax"
}],
"pax_discount": 0
}],
"bwm1": 8462,
"twm1": 1334,
"tbf1": 3564,
"tef": 8462
},
"leg": [{
"ori": "BOM",
"des": "BLR",
"adt": "Wed-07Apr2021",
"ddt": "Wed-07Apr2021",
"ati": "10:15",
"dti": "08:30",
"bcl": "R",
"ccl": "R",
"crr": "6E",
"crn": "Indigo",
"fnum": "5338",
"jdu": "05h 40m",
"oni": "10:15",
"flt": "1h 45m",
"ssk": "Non - Stop",
"fty": "Economy",
"rbdpnt": "0",
"cobrand_earn_flag": "false",
"airlineName": "6E",
"fkey": "0$0$39",
"aircraftLayoverAt": null,
"aircraftProviderCode": "6E`5338` ``BOM`04/07/2021 08:30`BLR`04/07/2021 10:15``?6E` 409` ``BLR`04/07/2021 12:40`VTZ`04/07/2021 14:10``!0`R` `6E`RCIP`1070``0`45``X!!6E`320`1``KG`15`Economy``6E`2`0`False`0``````01h 45m`RCIP`R",
"aircraftBaggageUnit": "KG",
"aircraftBaggageWeight": "15",
"dte": "Kempegowda Bangalore International Airport Terminal 1",
"dcty": "Bangalore",
"dec": "IN",
"den": "Bangalore",
"ate": "Chhatrapati Shivaji International (Sahar International) Terminal 2",
"octy": "Mumbai",
"orc": "IN",
"aircraftRemarks": null
}, {
"ori": "BLR",
"des": "VTZ",
"adt": "Wed-07Apr2021",
"ddt": "Wed-07Apr2021",
"ati": "14:10",
"dti": "12:40",
"bcl": "R",
"ccl": "R",
"crr": "6E",
"crn": "Indigo",
"fnum": " 409",
"jdu": "05h 40m",
"oni": "14:10",
"flt": "1h 30m",
"ssk": "Non - Stop",
"fty": "Economy",
"rbdpnt": "0",
"cobrand_earn_flag": "false",
"airlineName": "6E",
"fkey": "0$0$39",
"aircraftLayoverAt": null,
"aircraftProviderCode": "6E`5338` ``BOM`04/07/2021 08:30`BLR`04/07/2021 10:15``?6E` 409` ``BLR`04/07/2021 12:40`VTZ`04/07/2021 14:10``!0`R` `6E`RCIP`1070``0`45``X!!6E`321```KG`15`Economy``6E`1`0`False`0``````01h 30m`RCIP`R",
"aircraftBaggageUnit": "KG",
"aircraftBaggageWeight": "15",
"dte": "Vishakhapatnam",
"dcty": "Vishakhapatnam",
"dec": "IN",
"den": "Vishakhapatnam",
"ate": "Kempegowda Bangalore International Airport Terminal 1",
"octy": "Bangalore",
"orc": "IN",
"aircraftRemarks": null
}],
"oth": {
"BondType": "OutBound",
"Deeplink": "",
"EngineID": 0,
"FareIndicator": 0,
"FareRule": "CAN-BEF 8760_72:3000|CAN-BEF 72_4:3500|CHG-BEF 8760_72:2500|CHG-BEF 72_4:3000|EMTFee-300|CANCEL-BEF 8760_72:3000|CANCEL-BEF 72_4:3500|CHANGE-BEF 8760_72:2500|CHANGE-BEF 72_4:3000",
"IsBaggageFare": false,
"IsCache": false,
"IsHoldBooking": false,
"IsInternational": false,
"IsRoundTrip": false,
"IsSpecial": false,
"IsSpecialId": false,
"ItineraryKey": "b7SqRwmVDL7OJQP+J0ySES9vx7BMZSMJFZdVwW+LEF0=",
"JourneyIndex": 0,
"MemoryCreationTime": "/Date(1614796293608+0530)/",
"NearByAirport": false,
"PromoCode": null,
"Remark": "",
"SSDetails": null,
"SearchId": "102,ADT:BF=3564.0000 Tax=667.00000,CHD:BF=3564.0000 Tax=667.00000,INF:BF=0 Tax=0,DFValue=0,APFV=4231.00000`667.00000`3564.0000`3000`2500`0`0`RCIP``KG`15`True`0`0`0`0`,CPFV=4231.00000`667.00000`3564.0000`3000`2500`0`0```KG`15`True`0`0`0`0`,IPFV=,ATB=`Tax:667.00000,CTB=`Tax:667.00000,ITB=,TBA=7128.0000,TTA=1334.00000,TFA=8462.00000",
"Sessionfilepath": null
},
"JPMiles": 400,
"JPMiles_Bouns": 0,
"JPMiles_Bouns1": "0",
"cancellation_JPFees": "299",
"change_flight_JPFees": 299,
"cancellation_airline_fee": 3000,
"change_flight_airline_fee": 2500,
"flt_price": 8462,
"flt_tax": 1334,
"flt_base": 7128,
"cnvfee": "245.76892300000",
"rpm": "0.45",
"multiplier": "0.4425",
"traveller": 2,
"Refundable": true,
"adults": 3564,
"childs": 3564,
"infants": 0,
"pax_discount": 0,
"noOfAdults": 1,
"noOfChilds": 1,
"noOfInfants": 0,
"adultmiles": 200,
"childmiles": 200,
"infantsmile": 200,
"ukey": "0$0$39",
"hky": "745276494d7542303161424f4d56545a313130323032312d30342d30373131524547522c464c4558313230",
"id": "123456390BOMBLR2021-04-07_08302021-04-07_10156E5338EconomyBLRVTZ2021-04-07_12402021-04-07_14106E_409EconomyREGR",
"commonIdentifier": "OutBound-6E5338-08:30-14:10",
"fareindicator": "REGR",
"flexiBenefitDetails": {
"CATEGORY_NAME": "Saver",
"CATEGORY_ID": "REGR",
"BENEFITS": [{
"sequnceNumber": 1,
"name": "Free date change allowed",
"desc": "No Date Change Allowed",
"isBenefit": false,
"image": "date-change-red.svg"
}, {
"sequnceNumber": 2,
"name": "Free seats available",
"desc": "No Free Seat",
"isBenefit": false,
"image": "seat-select-red.svg"
}, {
"sequnceNumber": 3,
"name": "Lower Cancellation fees",
"desc": "Cancellation Fees apply",
"isBenefit": false,
"image": "free-flight-cancelation-red.svg"
}, {
"sequnceNumber": 4,
"name": "Free meal available",
"desc": "No Free Meals",
"isBenefit": false,
"image": "no-meals-available-red.svg"
}]
},
"farebenefits": ["Cancellation fees apply", "Date change chargeable"],
"acd": "6E",
"flexibucket": [{
"Refundable": true,
"adultmiles": 200,
"childmiles": 200,
"infantsmile": 200,
"adults": 3564,
"childs": 3564,
"infants": 0,
"far": {
"tbf": 7128,
"bwm": 8462,
"twm": 1334,
"fbr": [{
"bg": {
"ut": "KG",
"wt": "15"
},
"cn": {
"cnp": 3000,
"csh": 0,
"chp": 2500,
"ref": true,
"commission": 18,
"equivCurrencyCode": "",
"dFValue": "0"
},
"fco": "RCIP",
"fke": "R",
"fin": "0~R~ ~6E~RCIP~1070~~0~45~~X",
"zc": false,
"mk": 0,
"ty": 0,
"stf": 0,
"sf": 0,
"tds": 0,
"tf": 4231,
"tt": 667,
"tra": 0,
"trf": 0,
"zcc": 0,
"zcv": null,
"refundable": true,
"basicFare": 3564,
"baseTransactionAmount": 0,
"otax": [{
"Amount": 667,
"ChargeCode": "Tax",
"ChargeType": "Tax"
}],
"pax_discount": 0
}, {
"bg": {
"ut": "KG",
"wt": "15"
},
"cn": {
"cnp": 3000,
"csh": 0,
"chp": 2500,
"ref": true,
"commission": 18,
"equivCurrencyCode": "",
"dFValue": "0"
},
"fco": null,
"fke": null,
"fin": null,
"zc": false,
"mk": 0,
"ty": 1,
"stf": 0,
"sf": 0,
"tds": 0,
"tf": 4231,
"tt": 667,
"tra": 0,
"trf": 0,
"zcc": 0,
"zcv": null,
"refundable": true,
"basicFare": 3564,
"baseTransactionAmount": 0,
"otax": [{
"Amount": 667,
"ChargeCode": "Tax",
"ChargeType": "Tax"
}],
"pax_discount": 0
}],
"bwm1": 8462,
"twm1": 1334,
"tbf1": 3564,
"tef": 8462
},
"cancellation_JPFees": "299",
"cancellation_airline_fee": 3000,
"change_flight_JPFees": 299,
"change_flight_airline_fee": 2500,
"flt_base": 7128,
"flt_tax": 1334,
"flt_price": 8462,
"hky": "745276494d7542303161424f4d56545a313130323032312d30342d30373131524547522c464c4558313230",
"id": "123456390BOMBLR2021-04-07_08302021-04-07_10156E5338EconomyBLRVTZ2021-04-07_12402021-04-07_14106E_409EconomyREGR",
"noOfAdults": 1,
"noOfChilds": 1,
"noOfInfants": 0,
"oth": {
"SSDetails": null,
"FareRule": "CAN-BEF 8760_72:3000|CAN-BEF 72_4:3500|CHG-BEF 8760_72:2500|CHG-BEF 72_4:3000|EMTFee-300|CANCEL-BEF 8760_72:3000|CANCEL-BEF 72_4:3500|CHANGE-BEF 8760_72:2500|CHANGE-BEF 72_4:3000"
},
"pax_discount": 0,
"traveller": 2,
"ukey": "0$0$39",
"flexiBenefitDetails": {
"CATEGORY_NAME": "Saver",
"CATEGORY_ID": "REGR",
"BENEFITS": [{
"sequnceNumber": 1,
"name": "Free date change allowed",
"desc": "No Date Change Allowed",
"isBenefit": false,
"image": "date-change-red.svg"
}, {
"sequnceNumber": 2,
"name": "Free seats available",
"desc": "No Free Seat",
"isBenefit": false,
"image": "seat-select-red.svg"
}, {
"sequnceNumber": 3,
"name": "Lower Cancellation fees",
"desc": "Cancellation Fees apply",
"isBenefit": false,
"image": "free-flight-cancelation-red.svg"
}, {
"sequnceNumber": 4,
"name": "Free meal available",
"desc": "No Free Meals",
"isBenefit": false,
"image": "no-meals-available-red.svg"
}]
},
"farebenfits": ["Cancellation fees apply", "Date change chargeable"],
"JPMiles": 400
}, {
"Refundable": true,
"adultmiles": 200,
"childmiles": 200,
"infantsmile": 200,
"adults": 4064,
"childs": 4064,
"infants": 0,
"far": {
"tbf": 8128,
"bwm": 9512,
"twm": 1384,
"fbr": [{
"bg": {
"ut": "KG",
"wt": "15"
},
"cn": {
"cnp": 500,
"csh": 0,
"chp": 0,
"ref": true,
"commission": 20,
"equivCurrencyCode": "",
"dFValue": "0"
},
"fco": "RUIP",
"fke": "R",
"fin": "0~R~ ~6E~RUIP~2009~~0~16~~X",
"zc": false,
"mk": 0,
"ty": 0,
"stf": 0,
"sf": 0,
"tds": 0,
"tf": 4756,
"tt": 692,
"tra": 0,
"trf": 0,
"zcc": 0,
"zcv": null,
"refundable": true,
"basicFare": 4064,
"baseTransactionAmount": 0,
"otax": [{
"Amount": 692,
"ChargeCode": "Tax",
"ChargeType": "Tax"
}],
"pax_discount": 0
}, {
"bg": {
"ut": "KG",
"wt": "15"
},
"cn": {
"cnp": 500,
"csh": 0,
"chp": 0,
"ref": true,
"commission": 20,
"equivCurrencyCode": "",
"dFValue": "0"
},
"fco": null,
"fke": null,
"fin": null,
"zc": false,
"mk": 0,
"ty": 1,
"stf": 0,
"sf": 0,
"tds": 0,
"tf": 4756,
"tt": 692,
"tra": 0,
"trf": 0,
"zcc": 0,
"zcv": null,
"refundable": true,
"basicFare": 4064,
"baseTransactionAmount": 0,
"otax": [{
"Amount": 692,
"ChargeCode": "Tax",
"ChargeType": "Tax"
}],
"pax_discount": 0
}],
"bwm1": 9512,
"twm1": 1384,
"tbf1": 4064,
"tef": 9512
},
"cancellation_JPFees": "299",
"cancellation_airline_fee": 500,
"change_flight_JPFees": 299,
"change_flight_airline_fee": 0,
"flt_base": 8128,
"flt_tax": 1384,
"flt_price": 9512,
"hky": "745276494d7542303161424f4d56545a313130323032312d30342d30373131524547522c464c4558313230",
"id": "123456170BOMBLR2021-04-07_08302021-04-07_10156E5338EconomyBLRVTZ2021-04-07_12402021-04-07_14106E_409EconomyFLEX",
"noOfAdults": 1,
"noOfChilds": 1,
"noOfInfants": 0,
"oth": {
"SSDetails": null,
"FareRule": "CAN-BEF 8760_72:500|CAN-BEF 72_4:3500|CHG-BEF 8760_72:0|CHG-BEF 72_4:3000|EMTFee-300|CANCEL-BEF 8760_72:500|CANCEL-BEF 72_4:3500|CHANGE-BEF 8760_72:0|CHANGE-BEF 72_4:3000"
},
"pax_discount": 0,
"traveller": 2,
"ukey": "0$0$17",
"flexiBenefitDetails": {
"CATEGORY_NAME": "FlexiPlus",
"CATEGORY_ID": "FLEX",
"BENEFITS": [{
"sequnceNumber": 1,
"name": "Free date change allowed",
"desc": "Free Date Change Allowed",
"isBenefit": true,
"image": "date-change-green.svg"
}, {
"sequnceNumber": 2,
"name": "Free seats available",
"desc": "Free Seat Available",
"isBenefit": true,
"image": "seat-select-green.svg"
}, {
"sequnceNumber": 3,
"name": "Lower Cancellation fees",
"desc": "Lower Cancellation fees",
"isBenefit": true,
"image": "free-flight-cancelation-green.svg"
}, {
"sequnceNumber": 4,
"name": "Free meal available",
"desc": "Free Meals Available",
"isBenefit": true,
"image": "meals-available-green.svg"
}]
},
"farebenfits": ["Lower Cancellation fees", "Free date change allowed", "Free seats available", "Free meal available"],
"JPMiles": 400
}]
}
],
"SID": "424f4d56545a313130323032312d30342d30374439326430386462382d396233322d346335392d393136302d36636439363562346564626245"
}
]
^
^ \N ^ ^ \N ^ ^ 2021 - 03 - 04 00: 01: 36 ^ ^ \N ^ ^ 424 f4d56545a313130323032312d30342d30374439326430386462382d396233322d346335392d393136302d36636439363562346564626245 ^ ^ 92 d08db8 - 9 b32 - 4 c59 - 9160 - 6 cd965b4edbb
Я упомянул несколько stackoverflow, но не смог получить много подсказок. Я сослался на Разбор структур JSON в txt файл, содержащий JSON и текстовые структуры ссылка.
My code works for small sample json file as given below:
with open('SEARCH_LOGS.json','r') as f:
data = json.loads(f.read())
df_nested_list = pd.json_normalize(data, record_path =['onward_resp'],meta=['SID'],errors='ignore')
df_nested_list_leg = pd.json_normalize(data, record_path =['onward_resp','leg'],errors='ignore')
df_nested_list_fbr = pd.json_normalize(data, record_path =['onward_resp','far','fbr'],errors='ignore')
df_nested_list_otax = pd.json_normalize(data, record_path =['onward_resp','far','fbr','otax'],errors='ignore')
df_nested_list.to_csv('FPSEARCHLOG.csv',header=True,index=False)
df_nested_list_leg.to_csv('FPSEARCHLOGLEG.csv',header=True,index=False)
df_nested_list_fbr.to_csv('FPSEARCHLOGLEGFBR.csv',header=True,index=False)
и ниже код зависает с файлом 3gb
import re
fragments = iter(re.split('([{}])', f.read()))
while True:
try:
while True:
candidate = next(fragments)
if candidate == '{':
break
while True:
candidate += next(fragments)
try:
print(json.loads(candidate))
break
except json.decoder.JSONDecodeError:
pass
except StopIteration:
break
Примечание. Все, что мне нужно, - это окончательный файл, как в csv, со всем ключом столбца и их строкой в качестве значения во всем текстовом файле объемом 3 ГБ.