Skip to content

Commit

Permalink
Fix IA attachment description keys
Browse files Browse the repository at this point in the history
  • Loading branch information
ttys0dev committed Aug 28, 2024
1 parent e57f9b8 commit 18dc1e9
Show file tree
Hide file tree
Showing 13 changed files with 360 additions and 536 deletions.
12 changes: 8 additions & 4 deletions juriscraper/pacer/internet_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,6 @@ def docket_entries(self):
for de_node in de_nodes:
de = {
"document_number": de_node.xpath("./@doc_num")[0],
"description": self._xpath_text_0(de_node, "./long_desc"),
"short_description": self._xpath_text_0(
de_node, "./short_desc"
),
"pacer_seq_no": self._xpath_text_0(
de_node, "./pacer_de_seq_num"
)
Expand All @@ -191,6 +187,14 @@ def docket_entries(self):
attachment_number = de_node.xpath("./@attachment_num")[0]
if attachment_number != "0":
de["attachment_number"] = attachment_number
de["description"] = self._xpath_text_0(
de_node, "./short_desc"
)
else:
de["description"] = self._xpath_text_0(de_node, "./long_desc")
de["short_description"] = self._xpath_text_0(
de_node, "./short_desc"
)

date_filed_str = self._xpath_text_0(de_node, "./date_filed")
if date_filed_str:
Expand Down
25 changes: 10 additions & 15 deletions tests/examples/pacer/dockets_internet_archive/almd_49523.json
Original file line number Diff line number Diff line change
Expand Up @@ -1150,11 +1150,10 @@
{
"attachment_number": "1",
"date_filed": "2014-07-09",
"description": "",
"description": "Advertisement Certification Report and Notice of Forfeiture",
"document_number": "625",
"pacer_doc_id": "01702313993",
"pacer_seq_no": null,
"short_description": "Advertisement Certification Report and Notice of Forfeiture"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-09",
Expand All @@ -1169,11 +1168,10 @@
{
"attachment_number": "1",
"date_filed": "2014-07-09",
"description": "",
"description": "Exhibit A",
"document_number": "626",
"pacer_doc_id": "01702314758",
"pacer_seq_no": null,
"short_description": "Exhibit A"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-09",
Expand All @@ -1196,11 +1194,10 @@
{
"attachment_number": "1",
"date_filed": "2014-07-09",
"description": "",
"description": "Exhibit A",
"document_number": "629",
"pacer_doc_id": "01702315696",
"pacer_seq_no": null,
"short_description": "Exhibit A"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-09",
Expand Down Expand Up @@ -1239,20 +1236,18 @@
{
"attachment_number": "1",
"date_filed": "2014-07-28",
"description": "",
"description": "Stipulation of Final Settlement and Release of All Claims as to Third Party Pet",
"document_number": "635",
"pacer_doc_id": "01702325954",
"pacer_seq_no": null,
"short_description": "Stipulation of Final Settlement and Release of All Claims as to Third Party Pet"
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2014-07-28",
"description": "",
"description": "Text of Proposed Order",
"document_number": "635",
"pacer_doc_id": "01702325955",
"pacer_seq_no": null,
"short_description": "Text of Proposed Order"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-28",
Expand Down
40 changes: 16 additions & 24 deletions tests/examples/pacer/dockets_internet_archive/azd_1061043.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,74 +14,66 @@
{
"attachment_number": "1",
"date_filed": "2017-10-25",
"description": "",
"description": "Civil Cover Sheet",
"document_number": "1",
"pacer_doc_id": "025018027677",
"pacer_seq_no": null,
"short_description": "Civil Cover Sheet"
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027678",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "3",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027679",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "4",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027680",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "5",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027681",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "6",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027682",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "7",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027683",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "8",
"date_filed": "2017-10-25",
"description": "",
"description": "Exhibit",
"document_number": "1",
"pacer_doc_id": "025018027684",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
}
],
"date_filed": "2017-10-25",
Expand Down
84 changes: 33 additions & 51 deletions tests/examples/pacer/dockets_internet_archive/azd_318008.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
{
"attachment_number": "1",
"date_filed": "2006-09-07",
"description": "",
"description": "Civil Cover Sheet",
"document_number": "1",
"pacer_doc_id": "02501840501",
"pacer_seq_no": null,
"short_description": "Civil Cover Sheet"
"pacer_seq_no": null
}
],
"date_filed": "2006-09-07",
Expand Down Expand Up @@ -553,74 +552,66 @@
{
"attachment_number": "1",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit A: Riverdeep Complaint",
"document_number": "67",
"pacer_doc_id": "02502837709",
"pacer_seq_no": null,
"short_description": "Exhibit A: Riverdeep Complaint"
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit B: Westlaw Cases",
"document_number": "67",
"pacer_doc_id": "02502837710",
"pacer_seq_no": null,
"short_description": "Exhibit B: Westlaw Cases"
"pacer_seq_no": null
},
{
"attachment_number": "3",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit C - Midwest's Answer's to Plaintiff's Second Set of Inter",
"document_number": "67",
"pacer_doc_id": "02502837711",
"pacer_seq_no": null,
"short_description": "Exhibit C - Midwest's Answer's to Plaintiff's Second Set of Inter"
"pacer_seq_no": null
},
{
"attachment_number": "4",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit D: 071130 Midwest's Settlement Letter",
"document_number": "67",
"pacer_doc_id": "02502837712",
"pacer_seq_no": null,
"short_description": "Exhibit D: 071130 Midwest's Settlement Letter"
"pacer_seq_no": null
},
{
"attachment_number": "5",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit E - Midwest's Settlement Conference Memorandum",
"document_number": "67",
"pacer_doc_id": "02502837713",
"pacer_seq_no": null,
"short_description": "Exhibit E - Midwest's Settlement Conference Memorandum"
"pacer_seq_no": null
},
{
"attachment_number": "6",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit F - Excerpts from Deposition of Robert Vitale1",
"document_number": "67",
"pacer_doc_id": "02502837714",
"pacer_seq_no": null,
"short_description": "Exhibit F - Excerpts from Deposition of Robert Vitale1"
"pacer_seq_no": null
},
{
"attachment_number": "7",
"date_filed": "2008-03-26",
"description": "",
"description": "Exhibit G - U.S. Trademark Registration No. 3,318,243",
"document_number": "67",
"pacer_doc_id": "02502837715",
"pacer_seq_no": null,
"short_description": "Exhibit G - U.S. Trademark Registration No. 3,318,243"
"pacer_seq_no": null
},
{
"attachment_number": "8",
"date_filed": "2008-03-26",
"description": "",
"description": "Text of Proposed Order",
"document_number": "67",
"pacer_doc_id": "02502837716",
"pacer_seq_no": null,
"short_description": "Text of Proposed Order"
"pacer_seq_no": null
}
],
"date_filed": "2008-03-26",
Expand Down Expand Up @@ -715,11 +706,10 @@
{
"attachment_number": "1",
"date_filed": "2008-05-09",
"description": "",
"description": "Memorandum in Support of Motion for Partial Summary Judgment",
"document_number": "78",
"pacer_doc_id": "02502995843",
"pacer_seq_no": null,
"short_description": "Memorandum in Support of Motion for Partial Summary Judgment"
"pacer_seq_no": null
}
],
"date_filed": "2008-05-09",
Expand All @@ -734,29 +724,26 @@
{
"attachment_number": "1",
"date_filed": "2008-05-09",
"description": "",
"description": "Supplement Statement of Facts in Support of Plaintiff's Motion for Summary",
"document_number": "79",
"pacer_doc_id": "02502996134",
"pacer_seq_no": null,
"short_description": "Supplement Statement of Facts in Support of Plaintiff's Motion for Summary"
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2008-05-09",
"description": "",
"description": "Exhibit Exhibits 1-7 to SOF in Support of Motion for Summary Judgment",
"document_number": "79",
"pacer_doc_id": "02502996135",
"pacer_seq_no": null,
"short_description": "Exhibit Exhibits 1-7 to SOF in Support of Motion for Summary Judgment"
"pacer_seq_no": null
},
{
"attachment_number": "3",
"date_filed": "2008-05-09",
"description": "",
"description": "Exhibit Exhibits 7-14 to SOF in Support of Motion for Summary Judgment",
"document_number": "79",
"pacer_doc_id": "02502996136",
"pacer_seq_no": null,
"short_description": "Exhibit Exhibits 7-14 to SOF in Support of Motion for Summary Judgment"
"pacer_seq_no": null
}
],
"date_filed": "2008-05-09",
Expand Down Expand Up @@ -1435,20 +1422,18 @@
{
"attachment_number": "1",
"date_filed": "2009-03-16",
"description": "Attachment 1SEALED Minute Entry. Proceedings held before Judge David G Campbell on 3/16/2009. (Court Reporter Patricia Lyons.) (cc: Dosek/Marvinney/CD)(MAP)",
"description": "",
"document_number": "163",
"pacer_doc_id": "02513170767",
"pacer_seq_no": null,
"short_description": ""
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2009-03-16",
"description": "Attachment 2SEALED Minute Entry. Proceedings held before Judge David G Campbell on 3/16/2009. (Court Reporter Patricia Lyons.) (cc: Dosek/Marvinney/CD)(MAP)",
"description": "",
"document_number": "163",
"pacer_doc_id": "02513170768",
"pacer_seq_no": null,
"short_description": ""
"pacer_seq_no": null
}
],
"date_filed": "2009-03-16",
Expand Down Expand Up @@ -1522,26 +1507,23 @@
"description": "",
"document_number": "195",
"pacer_doc_id": "02513307026",
"pacer_seq_no": null,
"short_description": ""
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": null,
"description": "",
"document_number": "195",
"pacer_doc_id": "02513307027",
"pacer_seq_no": null,
"short_description": ""
"pacer_seq_no": null
},
{
"attachment_number": "3",
"date_filed": null,
"description": "",
"document_number": "195",
"pacer_doc_id": "02513307028",
"pacer_seq_no": null,
"short_description": ""
"pacer_seq_no": null
}
],
"date_filed": null,
Expand Down
Loading

0 comments on commit 18dc1e9

Please sign in to comment.