From 147b08ce498869f8416b77385c5ba534afd6fa2b Mon Sep 17 00:00:00 2001 From: marwoodandrew Date: Mon, 3 Jul 2023 09:56:22 +1000 Subject: [PATCH] SDAAP-75 Add function to remove embedded media content --- superdesk/default_settings.py | 6 + superdesk/editor_utils.py | 35 + superdesk/publish/formatters/__init__.py | 1 + .../publish/formatters/email_formatter.py | 2 + .../publish/formatters/ninjs_ftp_formatter.py | 253 +++++- .../formatters/email_formatter_test.py | 56 ++ .../formatters/ninjs_ftp_formatter_test.py | 841 +++++++++++++++++- 7 files changed, 1140 insertions(+), 54 deletions(-) diff --git a/superdesk/default_settings.py b/superdesk/default_settings.py index 74d4340c20..6dec4e6322 100644 --- a/superdesk/default_settings.py +++ b/superdesk/default_settings.py @@ -1062,3 +1062,9 @@ def local_to_utc_hour(hour): #: .. versionadded:: 2.4.1 #: APM_SERVICE_NAME = env("APM_SERVICE_NAME") + +#: Apply product filtering to embedded media items +#: +#: .. versionadded:: +#: +EMBED_PRODUCT_FILTERING = strtobool(env("EMBED_PRODUCT_FILTERING", "false")) diff --git a/superdesk/editor_utils.py b/superdesk/editor_utils.py index f77713f901..364cae3170 100644 --- a/superdesk/editor_utils.py +++ b/superdesk/editor_utils.py @@ -167,6 +167,10 @@ def insert(self, index, value): self._ranges.insert(index, value.ranges) self._mapping[value.key] = value.data + def clear(self): + for idx, kk in enumerate(self._ranges): + del self[kk.get("key")] + class Block: """Abstraction of DraftJS block""" @@ -855,3 +859,34 @@ def copy_fields(source: Dict, dest: Dict, ignore_empty: bool = False): for field in source["fields_meta"]: if ignore_empty is False or not is_empty_content_state(source, field): dest.setdefault("fields_meta", {})[field] = source["fields_meta"][field].copy() + + +def remove_all_embeds(article): + """ + Removes any embeds from the draftjs state and regenerates the html, can be used by text only + formatters to remove embeds from the article + :param article: + :return: + """ + + # List of keys of the removed entities + keys = [] + + def not_embed(block): + if block.type.lower() == "atomic": + keys.extend([e.key for e in block.entities]) + block.entities.clear() + return False + return True + + fields = get_content_state_fields(article) + for field in fields: + filter_blocks(article, field, not_embed) + + # Remove the corresponding items from the associations and refs + for key_suffix in keys: + key = "editor_{}".format(key_suffix) + if article.get("associations", {}).get(key): + article.get("associations").pop(key) + if "refs" in article: + article["refs"] = [r for r in article.get("refs", []) if r["key"] != key] diff --git a/superdesk/publish/formatters/__init__.py b/superdesk/publish/formatters/__init__.py index c921699bd9..6fcef7f71d 100644 --- a/superdesk/publish/formatters/__init__.py +++ b/superdesk/publish/formatters/__init__.py @@ -15,6 +15,7 @@ from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, FORMATS, FORMAT from superdesk.etree import parse_html from superdesk.text_utils import get_text +from superdesk.editor_utils import get_content_state_fields, filter_blocks formatters = [] # type: List[Type[Formatter]] diff --git a/superdesk/publish/formatters/email_formatter.py b/superdesk/publish/formatters/email_formatter.py index c4472a205c..a6981a0f07 100644 --- a/superdesk/publish/formatters/email_formatter.py +++ b/superdesk/publish/formatters/email_formatter.py @@ -17,6 +17,7 @@ from copy import deepcopy from superdesk.errors import FormatterError from superdesk import etree as sd_etree +from superdesk.editor_utils import remove_all_embeds class EmailFormatter(Formatter): @@ -55,6 +56,7 @@ def _inject_dateline(self, formatted_article): def format(self, article, subscriber, codes=None): formatted_article = deepcopy(article) + remove_all_embeds(formatted_article) pub_seq_num = superdesk.get_resource_service("subscribers").generate_sequence_number(subscriber) doc = {} try: diff --git a/superdesk/publish/formatters/ninjs_ftp_formatter.py b/superdesk/publish/formatters/ninjs_ftp_formatter.py index 55b836b8bc..639cb218fb 100644 --- a/superdesk/publish/formatters/ninjs_ftp_formatter.py +++ b/superdesk/publish/formatters/ninjs_ftp_formatter.py @@ -10,11 +10,15 @@ from .ninjs_formatter import NINJSFormatter +from flask import current_app as app from superdesk.media.renditions import get_rendition_file_name -from lxml import html as lxml_html -from superdesk.etree import to_string +from superdesk import get_resource_service +from superdesk.editor_utils import get_content_state_fields, Editor3Content, DraftJSHTMLExporter +from superdesk.media.renditions import get_renditions_spec +from draftjs_exporter.dom import DOM +from copy import deepcopy +from textwrap import dedent import logging -import re logger = logging.getLogger(__name__) @@ -28,15 +32,7 @@ def __init__(self): super().__init__() self.format_type = "ftp ninjs" self.internal_renditions = [] - - def _get_source_ref(self, marker, ninjs): - try: - return ninjs.get("associations").get(marker).get("renditions").get("original").get("href") - except Exception: - logger.warning( - "href not found for the original in FTP NINJS formatter, ensure the formatter has it enabled" - ) - return None + self.path = None def _transform_to_ninjs(self, article, subscriber, recursive=True): """ @@ -47,48 +43,215 @@ def _transform_to_ninjs(self, article, subscriber, recursive=True): :param recursive: :return: """ - + # Get the path that the renditions will be pushed to + self.path = subscriber.get("destinations")[0].get("config").get("associated_path") include_original = subscriber.get("destinations")[0].get("config").get("include_original", False) if include_original: self.internal_renditions = ["original"] - ninjs = super()._transform_to_ninjs(article, subscriber, recursive) + formatted_article = deepcopy(article) - # Get the path that the renditions will be pushed to - path = subscriber.get("destinations")[0].get("config").get("associated_path") + if article.get("type") == "text" and recursive: + self.apply_product_filtering_to_associations(formatted_article, subscriber) + + ninjs = super()._transform_to_ninjs(formatted_article, subscriber, recursive) + + renditions = ninjs.get("renditions") + if renditions: + for name, rendition in renditions.items(): + rendition["href"] = ( + self.path.lstrip("/") + + ("/" if not self.path.endswith("/") and self.path else "") + + get_rendition_file_name(rendition) + ) + + return ninjs + + def apply_product_filtering_to_associations(self, article, subscriber): + """ + Remove the embedded items from the article that the subscriber has no matching product for. + :param article: + :param subscriber: + :return: + """ + if not app.config["EMBED_PRODUCT_FILTERING"]: + return + + remove_keys = [] + permitted_products = set(subscriber["products"]) + + for key, item in article.get("associations", {}).items(): + if key.startswith("editor_"): + result = get_resource_service("product_tests").test_products(item, lookup=None) + matching_products = set(p["product_id"] for p in result if p.get("matched", False)) + if not matching_products.intersection(permitted_products): + remove_keys.append(key) + + self.remove_embeds(article, remove_keys) + + def remove_embeds(self, article, remove_keys): + """ + Removes the nominated embeds from the draftjs state and regenerates the HTML. + :param article: + :param remove_keys + :return: + """ + + to_remove = [k.lstrip("editor_") for k in remove_keys] + + def not_embed(block): + if block.type.lower() == "atomic": + bk = [e.key for e in block.entities if e.key in to_remove] + if bk: + return False + return True + + fields = get_content_state_fields(article) + for field in fields: + self.filter_blocks(article, field, not_embed) + + for key in remove_keys: + article.get("associations", {}).pop(key, None) + if "refs" in article: + article["refs"] = [r for r in article.get("refs", []) if r["key"] != key] + + def filter_blocks(self, item, field, filter, is_html=True): + editor = Editor3Content(item, field, is_html) + # assign special Ninjs FTP exporter + exporter = NinjsFTPExporter(editor) + exporter.set_formatter(self) + editor.html_exporter = exporter + blocks = [] + for block in editor.blocks: + if filter(block): + blocks.append(block) + editor.set_blocks(blocks) + editor.update_item() + + +class NinjsFTPExporter(DraftJSHTMLExporter): + + formatter = None + + def set_formatter(self, formatter): + self.formatter = formatter + + def render_media(self, props): + # we need to retrieve the key, there is not straightforward way to do it + # so we find the key in entityMap with a corresponding value + embed_key = next( + k for k, v in self.content_state["entityMap"].items() if v["data"].get("media") == props["media"] + ) + media_props = props["media"] + media_type = media_props.get("type", "picture") + + rendition = media_props["renditions"].get("original") or media_props["renditions"]["viewImage"] + alt_text = media_props.get("alt_text") or "" + desc = media_props.get("description_text") + if media_type == "picture": + path = self.formatter.path + + renditions_to_publish = self.formatter.internal_renditions + list( + get_renditions_spec(without_internal_renditions=True).keys() + ) + + renditions = media_props.get("renditions") + # filter the renditions for those we wish to publish + renditions = {name: rendition for name, rendition in renditions.items() if name in renditions_to_publish} - if path: - renditions = ninjs.get("renditions") if renditions: for name, rendition in renditions.items(): rendition["href"] = ( - "/" - + path.lstrip("/") - + ("/" if not path.endswith("/") else "") + path.lstrip("/") + + ("/" if not path.endswith("/") and path else "") + get_rendition_file_name(rendition) ) - if article.get("type", "") == "text": - # Find any embeded image references in the body_html and re-wire the img src reference and insert an id - html_updated = False - root_elem = lxml_html.fromstring(ninjs.get("body_html")) - # Scan any comments for embed markers - comments = root_elem.xpath("//comment()") - for comment in comments: - if "EMBED START Image" in comment.text: - regex = r" +
{content}
+ """ + ).format(embed_type=embed_type, key=embed_key, content=content) + ) + + return embed + + def get_source_ref(self, renditions): + try: + return renditions.get("original").get("href") + except Exception: + widest = -1 + src_rendition = "" + for rendition in renditions: + width = renditions.get(rendition).get("width") + if width > widest: + widest = width + src_rendition = rendition + + if widest > 0: + return renditions.get(src_rendition).get("href").lstrip("/") + + logger.warning("href not found in FTP NINJS formatter, ensure the formatter has it enabled") + return None + + def get_source_set_refs(self, renditions): + try: + srcset = [] + for rendition in renditions: + srcset.append( + renditions.get(rendition).get("href").lstrip("/") + + " " + + str(renditions.get(rendition).get("width")) + + "w" + ) + return ",".join(srcset) + except Exception: + return None diff --git a/tests/publish/formatters/email_formatter_test.py b/tests/publish/formatters/email_formatter_test.py index 03a3fee58f..5fed0d883f 100644 --- a/tests/publish/formatters/email_formatter_test.py +++ b/tests/publish/formatters/email_formatter_test.py @@ -335,3 +335,59 @@ def test_unbroken_html(self): item = json.loads(doc) self.assertIn("

abcdefghijklmnopqrstuvwxyz

\r\n", item.get("message_html")) + + def test_remove_embedded_content(self): + article = { + "_id": "urn:newsml:localhost:2023-05-10T14:28:37.121795:62fc7a2b-a69a-4c47-8540-49c075a4d62c", + "body_html": '

pre amble

\n\n
' + '
" + '\n

post amble

', + "headline": "Budget with a pic", + "fields_meta": { + "body_html": { + "draftjsState": [ + { + "blocks": [ + { + "key": "77gu7", + "text": " pre amble", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {"MULTIPLE_HIGHLIGHTS": {}}, + }, + { + "key": "cj518", + "text": " ", + "type": "atomic", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [{"offset": 0, "length": 1, "key": 0}], + "data": {}, + }, + { + "key": "ambpd", + "text": "post amble", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {}, + }, + ], + "entityMap": {"0": {"type": "MEDIA", "mutability": "MUTABLE", "data": {"media": {}}}}, + } + ] + } + }, + "type": "text", + "pubstatus": "usable", + "format": "HTML", + "guid": "urn:newsml:localhost:2023-05-10T14:28:37.121795:62fc7a2b-a69a-4c47-8540-49c075a4d62c", + } + seq, doc = self.formatter.format(article, {"name": "Test Subscriber"})[0] + self.assertNotIn("EMBED", doc) + self.assertNotIn("Some text

\n\n
\n ' - ' Power plant\n
Steam rises from the' - ' brown coal-fired power plant
\n
\n\n

More text

", + "body_html": '

Some text

\n\n
' + 'Power plant' + "
Steam rises from the brown coal-fired power plant
\n" + '\n

More text

', "service": [{"name": "Australian General News", "code": "a"}], "version": "1", } self.assertEqual(expected, json.loads(doc)) + + def test_embedded_image_rendition_set(self): + self.app.data.insert( + "vocabularies", + [ + { + "_id": "crop_sizes", + "items": [ + {"is_active": True, "name": "4-3", "width": 800, "height": 600}, + {"is_active": True, "name": "16-9", "width": 1280, "height": 720}, + ], + } + ], + ) + self.app.data.insert( + "filter_conditions", + [ + {"_id": 1, "field": "type", "operator": "eq", "value": "picture", "name": "Picture fc"}, + {"_id": 2, "field": "type", "operator": "eq", "value": "video", "name": "Video fc"}, + ], + ) + self.app.data.insert( + "content_filters", + [ + { + "_id": 3, + "content_filter": [{"expression": {"fc": [1]}}, {"expression": {"fc": [2]}}], + "name": "Picture cf", + } + ], + ) + self.app.data.insert( + "products", + [ + { + "_id": 1, + "content_filter": {"filter_id": 3, "filter_type": "permitting"}, + "name": "Picture and Video", + "product_type": "both", + } + ], + ) + article = { + "_id": "urn:newsml:localhost:2020-03-12T15:19:39.654956:e78f3dd6-c096-43d5-9ba0-014e07dc4f1f", + "associations": { + "editor_2": { + "_id": "urn:newsml:aap.com.au:2023-03-08T10:34:33.291350:bda807fd-466c-4861-a58f-6e18b0d59efd", + "media": "20230308100332/video.mp4", + "type": "video", + "pubstatus": "usable", + "renditions": { + "original": { + "href": "http://acme.com.au/api/upload-raw/20230308100332/video.mp4", + "media": "20230308100332/video.mp4", + "mimetype": "video/mp4", + } + }, + }, + "editor_0": { + "type": "picture", + "_id": "tag:localhost:2019:5c9da5e1-f491-4ffd-a0b5-dd54e9fd870f", + "renditions": { + "original": { + "mimetype": "image/jpeg", + "href": "http://acme.com.au/api/upload-raw/20190405160436/original.jpg", + "width": 4500, + "height": 3000, + "poi": {"y": 1770, "x": 2160}, + "media": "20190405160436/original.jpg", + }, + "4-3": { + "href": "http://acme.aap.com.au/api/upload-raw/fourthree.jpg", + "width": 800, + "height": 600, + "mimetype": "image/jpeg", + "media": "20230303110348/fourthree.jpg", + }, + "16-9": { + "href": "http://acme.com.au/api/upload-raw/sixteennine.jpg", + "width": 1280, + "height": 720, + "mimetype": "image/jpeg", + "media": "20230303110348/sixteennine.jpg", + }, + }, + }, + }, + "type": "text", + "headline": "Test Headline", + "body_html": '

Some text

\n\n
\n ' + 'Power plant\n ' + "
Steam rises from the brown coal-fired power plant
\n
\n" + '\n

More text

and a video

' + '\n
\n
" + '\n', + } + editor = Editor3Content(article, "body_html") + editor.update_item() + seq, doc = self.formatter.format( + article, + { + "name": "Test Subscriber", + "destinations": [ + { + "config": { + "host": "ftp.abc.com", + "path": "/stories", + "associated_path": "/pictures", + "push_associated": True, + "include_original": False, + } + } + ], + "products": [1], + }, + )[0] + expected = { + "guid": None, + "version": "1", + "type": "text", + "headline": "Test Headline", + "body_html": '

Some text

\n\n
' + '' + '
\n\n' + '

More text

\n

and a video

\n\n' + '
' + '
\n', + "associations": { + "editor_2": { + "guid": None, + "version": "1", + "type": "video", + "pubstatus": "usable", + "priority": 5, + "renditions": { + "original": { + "href": "pictures/20230308100332-video.mp4", + "mimetype": "video/mp4", + "media": "20230308100332/video.mp4", + } + }, + }, + "editor_0": { + "guid": None, + "version": "1", + "type": "picture", + "priority": 5, + "renditions": { + "4-3": { + "href": "pictures/20230303110348-fourthree.jpg", + "width": 800, + "height": 600, + "mimetype": "image/jpeg", + "media": "20230303110348/fourthree.jpg", + }, + "16-9": { + "href": "pictures/20230303110348-sixteennine.jpg", + "width": 1280, + "height": 720, + "mimetype": "image/jpeg", + "media": "20230303110348/sixteennine.jpg", + }, + }, + }, + }, + "priority": 5, + "charcount": 30, + "wordcount": 7, + "readtime": 0, + } + self.assertEqual(expected, json.loads(doc)) + + def test_product_match(self): + self.app.data.insert( + "filter_conditions", + [{"_id": 1, "field": "type", "operator": "eq", "value": "video", "name": "ALL Video fc"}], + ) + self.app.data.insert( + "content_filters", + [{"_id": 3, "content_filter": [{"expression": {"fc": [1]}}], "name": "All Video cf"}], + ) + self.app.data.insert( + "products", + [ + { + "_id": 1, + "content_filter": {"filter_id": 3, "filter_type": "permitting"}, + "name": "All Video", + "product_type": "both", + } + ], + ) + self.app.data.insert( + "vocabularies", + [ + { + "_id": "crop_sizes", + "items": [ + {"is_active": True, "name": "4-3", "width": 800, "height": 600}, + {"is_active": True, "name": "16-9", "width": 1280, "height": 720}, + ], + } + ], + ) + item = { + "_id": "urn:newsml:localhost:2023-06-07T11:24:33.346929:f46b4120-deab-4798-9a1b-32b7888ca05c", + "type": "text", + "version": 1, + "pubstatus": "usable", + "format": "HTML", + "_current_version": 5, + "firstcreated": "2023-06-07T01:24:33.000Z", + "versioncreated": "2023-06-07T01:31:02.000Z", + "guid": "urn:newsml:localhost:2023-06-07T11:24:33.346929:f46b4120-deab-4798-9a1b-32b7888ca05c", + "unique_id": 41882, + "unique_name": "#41882", + "family_id": "urn:newsml:localhost:2023-06-07T11:24:33.346929:f46b4120-deab-4798-9a1b-32b7888ca05c", + "state": "published", + "source": "AAP", + "priority": 6, + "urgency": 5, + "genre": [{"qcode": "Article", "name": "Article"}], + "dateline": { + "source": "AAP", + "date": "2023-06-07T01:24:33.000Z", + "located": { + "city": "Wagga Wagga", + "state": "New South Wales", + "dateline": "city", + "tz": "Australia/Sydney", + "city_code": "Wagga Wagga", + "country_code": "AU", + "alt_name": "", + "country": "Australia", + "state_code": "NSW", + }, + "text": "WAGGA WAGGA, June 7 AAP -", + }, + "byline": "Joe Black", + "place": [ + { + "country": "Australia", + "world_region": "Oceania", + "name": "QLD", + "state": "Queensland", + "group": "Australia", + "qcode": "QLD", + } + ], + "language": "en", + "operation": "publish", + "anpa_category": [{"name": "Australian General News", "qcode": "a"}], + "associations": { + "featuremedia": { + "_id": "tag:localhost:2022:5e76af3a-4575-4108-a5bf-d0c5664422ee", + "guid": "tag:localhost:2022:5e76af3a-4575-4108-a5bf-d0c5664422ee", + "headline": "Italy F1 GP Auto Racing", + "description_text": "Pole position Ferrari", + "archive_description": "Pole position Ferrari driver Charles Leclerc", + "source": "AP", + "original_source": "AP/Reuters", + "versioncreated": "2022-12-20T06:14:22+0000", + "firstcreated": "2022-09-10T15:10:00+0000", + "pubstatus": "usable", + "type": "picture", + "renditions": { + "original": { + "href": "http://localhost:5000/api/upload-raw/63a15287e6c3ec2d2df751c5.jpg", + "media": "63a15287e6c3ec2d2df751c5", + "mimetype": "image/jpeg", + "width": 3150, + "height": 2100, + }, + "baseImage": { + "href": "http://localhost:5000/api/upload-raw/63a1528ae6c3ec2d2df751cc?_schema=http", + "media": "63a1528ae6c3ec2d2df751cc", + "mimetype": "image/jpeg", + "width": 1400, + "height": 933, + }, + "thumbnail": { + "href": "http://localhost:5000/api/upload-raw/63a1528ae6c3ec2d2df751ce?_schema=http", + "media": "63a1528ae6c3ec2d2df751ce", + "mimetype": "image/jpeg", + "width": 180, + "height": 120, + }, + "viewImage": { + "href": "http://localhost:5000/api/upload-raw/63a1528ae6c3ec2d2df751d0?_schema=http", + "media": "63a1528ae6c3ec2d2df751d0", + "mimetype": "image/jpeg", + "width": 640, + "height": 426, + }, + "4-3": { + "width": 800, + "height": 600, + "href": "http://localhost:5000/api/upload-raw/647fdcdbae4a8a99d5d4f167.jpg", + "media": "647fdcdbae4a8a99d5d4f167", + "mimetype": "image/jpeg", + }, + "16-9": { + "width": 1280, + "height": 720, + "href": "http://localhost:5000/api/upload-raw/647fdcdbae4a8a99d5d4f16a.jpg", + "media": "647fdcdbae4a8a99d5d4f16a", + "mimetype": "image/jpeg", + }, + }, + "state": "published", + "slugline": "Video Test", + "byline": "AP PHOTO", + "ednote": "POOL IMAGE", + "mimetype": "image/jpeg", + "uri": "20220911001701433030", + "unique_id": 41837, + "unique_name": "#41837", + "_current_version": 3, + "operation": "publish", + "format": "HTML", + "priority": 6, + "urgency": 5, + "genre": [{"qcode": "Article", "name": "Article"}], + "language": "en", + "_type": "archive", + "_latest_version": 3, + "alt_text": "Italy F1 GP Auto Racing", + "anpa_category": [{"name": "Australian General News", "qcode": "a"}], + "subject": [ + { + "qcode": "05010004", + "name": "test/examination", + "parent": "05010000", + } + ], + "abstract": "

Story abstract

", + }, + "editor_0": { + "_id": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "media": "647fdaebae4a8a99d5d4eab5", + "type": "video", + "pubstatus": "usable", + "format": "HTML", + "_current_version": 2, + "firstcreated": "2023-06-07T01:18:32+0000", + "versioncreated": "2023-06-07T01:18:46+0000", + "guid": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "unique_id": 41881, + "unique_name": "#41881", + "family_id": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "state": "published", + "source": "AAP", + "priority": 6, + "urgency": 5, + "genre": [{"qcode": "Article", "name": "Article"}], + "byline": "Joe Black", + "place": [ + { + "country": "Australia", + "world_region": "Oceania", + "name": "QLD", + "state": "Queensland", + "group": "Australia", + "qcode": "QLD", + } + ], + "language": "en", + "operation": "publish", + "renditions": { + "original": { + "href": "http://localhost:5000/api/upload-raw/647fdaebae4a8a99d5d4eab5.mp4", + "media": "647fdaebae4a8a99d5d4eab5", + "mimetype": "video/mp4", + } + }, + "mimetype": "video/mp4", + "alt_text": "Video Alt text", + "description_text": "Test video decsription", + "expiry": "2023-07-18T17:18:46+0000", + "headline": "Test video headline", + "version": 2, + "_latest_version": 2, + "anpa_category": [{"name": "Australian General News", "qcode": "a"}], + "subject": [ + { + "qcode": "05010004", + "name": "test/examination", + "parent": "05010000", + } + ], + "slugline": "Video Test", + "abstract": "

Story abstract

", + }, + "editor_1": { + "_id": "tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", + "guid": "tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", + "headline": "Germany Daily Life", + "description_text": "A Valais black-nosed sheep jumps", + "archive_description": "A Valais black-nosed sheep jumps into the air on a snow-covered meadow", + "source": "AP", + "original_source": "AP/DPA", + "versioncreated": "2023-03-07T01:04:39+0000", + "firstcreated": "2023-01-28T00:00:00+0000", + "pubstatus": "usable", + "type": "picture", + "renditions": { + "original": { + "href": "http://localhost:5000/api/upload-raw/63d84c03b3fae2e407e0decd.jpg", + "media": "63d84c03b3fae2e407e0decd", + "mimetype": "image/jpeg", + "width": 4297, + "height": 2600, + }, + "baseImage": { + "href": "http://localhost:5000/api/upload-raw/63d84c08b3fae2e407e0dee9?_schema=http", + "media": "63d84c08b3fae2e407e0dee9", + "mimetype": "image/jpeg", + "width": 1400, + "height": 847, + }, + "thumbnail": { + "href": "http://localhost:5000/api/upload-raw/63d84c08b3fae2e407e0deeb?_schema=http", + "media": "63d84c08b3fae2e407e0deeb", + "mimetype": "image/jpeg", + "width": 198, + "height": 120, + }, + "viewImage": { + "href": "http://localhost:5000/api/upload-raw/63d84c08b3fae2e407e0deed?_schema=http", + "media": "63d84c08b3fae2e407e0deed", + "mimetype": "image/jpeg", + "width": 640, + "height": 387, + }, + "4-3": { + "width": 800, + "height": 600, + "href": "http://localhost:5000/api/upload-raw/64068da64d05f5303c82458b.jpg", + "media": "64068da64d05f5303c82458b", + "mimetype": "image/jpeg", + }, + "16-9": { + "width": 1280, + "height": 720, + "href": "http://localhost:5000/api/upload-raw/64068da74d05f5303c82458e.jpg", + "media": "64068da74d05f5303c82458e", + "mimetype": "image/jpeg", + }, + }, + "state": "published", + "slugline": "Video Test", + "byline": "AP PHOTO", + "ednote": "GERMANY OUT; MANDATORY CREDIT", + "mimetype": "image/jpeg", + "unique_id": 41842, + "unique_name": "#41842", + "_current_version": 2, + "expiry": "2023-04-17T17:04:39+0000", + "operation": "publish", + "format": "HTML", + "priority": 6, + "urgency": 5, + "genre": [{"qcode": "Article", "name": "Article"}], + "language": "en", + "alt_text": "Germany Daily Life", + "subject": [ + { + "qcode": "05010004", + "name": "test/examination", + "parent": "05010000", + } + ], + "version": 2, + "_type": "archive", + "_latest_version": 2, + "anpa_category": [{"name": "Australian General News", "qcode": "a"}], + "abstract": "

Story abstract

", + }, + }, + "refs": [ + { + "key": "featuremedia", + "_id": "tag:localhost:2022:5e76af3a-4575-4108-a5bf-d0c5664422ee", + "uri": "20220911001701433030", + "guid": "tag:localhost:2022:5e76af3a-4575-4108-a5bf-d0c5664422ee", + "type": "picture", + "source": "AP", + }, + { + "key": "editor_0", + "_id": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "guid": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "type": "video", + "source": "AAP", + }, + { + "key": "editor_1", + "_id": "tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", + "uri": "20230128001757495826", + "guid": "tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", + "type": "picture", + "source": "AP", + }, + ], + "slugline": "Video Test", + "subject": [ + { + "qcode": "05010004", + "name": "test/examination", + "parent": "05010000", + } + ], + "abstract": "

Story abstract

", + "annotations": [], + "body_html": '

For example, TCP port 631 pened by cupsd process and cupsd only listing on the loopback address (127.0.0.1). Similarly, TCP port 22 opened by sshd process and sshd listing on all IP address for ssh connections:

\n\n
\n
\n\n

The ss command is used to dump socket statistics. It allows showing information similar to netstat. It can display more TCP and state information than other tools. The syntax is:

\n\n
\n Germany Daily Life\n
A Valais black-nosed sheep jumps into the air on a snow-covered meadow in Langenenslingen, Germany
\n
\n\n

In addition, to above commands one can use the nmap command which is an open source tool for network exploration and security auditing. We are going to use nmap to find and list open ports in Linux: 

', + "fields_meta": { + "headline": { + "draftjsState": [ + { + "blocks": [ + { + "key": "fm5hr", + "text": "Video test story headline", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {"MULTIPLE_HIGHLIGHTS": {}}, + } + ], + "entityMap": {}, + } + ] + }, + "abstract": { + "draftjsState": [ + { + "blocks": [ + { + "key": "2e0dd", + "text": "Story abstract", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {"MULTIPLE_HIGHLIGHTS": {}}, + } + ], + "entityMap": {}, + } + ] + }, + "body_html": { + "draftjsState": [ + { + "blocks": [ + { + "key": "svbd", + "text": "For example, TCP port 631 pened by cupsd process and cupsd only listing on the loopback address (127.0.0.1). Similarly, TCP port 22 opened by sshd process and sshd listing on all IP address for ssh connections:", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {"MULTIPLE_HIGHLIGHTS": {}}, + }, + { + "key": "4g2t1", + "text": " ", + "type": "atomic", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [{"offset": 0, "length": 1, "key": 0}], + "data": {}, + }, + { + "key": "794l3", + "text": "The ss command is used to dump socket statistics. It allows showing information similar to netstat. It can display more TCP and state information than other tools. The syntax is:", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {}, + }, + { + "key": "69db4", + "text": " ", + "type": "atomic", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [{"offset": 0, "length": 1, "key": 1}], + "data": {}, + }, + { + "key": "ellml", + "text": "In addition, to above commands one can use the nmap command which is an open source tool for network exploration and security auditing. We are going to use nmap to find and list open ports in Linux: ", + "type": "unstyled", + "depth": 0, + "inlineStyleRanges": [], + "entityRanges": [], + "data": {}, + }, + ], + "entityMap": { + "0": { + "type": "MEDIA", + "mutability": "MUTABLE", + "data": { + "media": { + "_id": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "media": "647fdaebae4a8a99d5d4eab5", + "type": "video", + "pubstatus": "usable", + "format": "HTML", + "firstcreated": "2023-06-07T01:18:32+0000", + "versioncreated": "2023-06-07T01:18:46+0000", + "guid": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "unique_id": 41881, + "unique_name": "#41881", + "family_id": "urn:newsml:localhost:2023-06-07T11:18:32.082856:eacdccc3-37fd-4ec1-9aed-5b1e8fb61f99", + "state": "in_progress", + "source": "AAP", + "priority": 6, + "urgency": 0, + "genre": [{"qcode": "Article", "name": "Article"}], + "dateline": { + "date": "2023-06-07T01:18:32+0000", + "source": "AAP", + "located": { + "city": "Wagga Wagga", + "state": "New South Wales", + "dateline": "city", + "tz": "Australia/Sydney", + "city_code": "Wagga Wagga", + "country_code": "AU", + "alt_name": "", + "country": "Australia", + "state_code": "NSW", + }, + "text": "WAGGA WAGGA, June 7 AAP -", + }, + "byline": "The Great Unwashed", + "place": [ + { + "country": "Australia", + "world_region": "Oceania", + "name": "QLD", + "state": "Queensland", + "group": "Australia", + "qcode": "QLD", + } + ], + "language": "en", + "operation": "update", + "renditions": { + "original": { + "href": "http://localhost:5000/api/upload-raw/647fdaebae4a8a99d5d4eab5.mp4", + "media": "647fdaebae4a8a99d5d4eab5", + "mimetype": "video/mp4", + } + }, + "mimetype": "video/mp4", + "alt_text": "Video Alt text", + "description_text": "Test video decsription", + "expiry": "2023-07-18T17:18:46+0000", + "headline": "Test video headline", + "version": 2, + "_type": "archive", + "_latest_version": 2, + } + }, + }, + "1": { + "type": "MEDIA", + "mutability": "MUTABLE", + "data": { + "media": { + "_id": "tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", + "guid": "tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", + "headline": "Germany Daily Life", + "description_text": "A Valais black-nosed sheep jumps into the air on a snow-covered meadow in Langenenslingen, Germany", + "archive_description": "A Valais black-nosed sheep jumps into the air on a snow-covered meadow in Langenenslingen, Germany, Saturday, Jan. 28, 2023. (Thomas Warnack/dpa via AP)", + "source": "AP", + "original_source": "AP/DPA", + "versioncreated": "2023-03-07T01:04:39+0000", + "firstcreated": "2023-01-28T00:00:00+0000", + "pubstatus": "usable", + "type": "picture", + "renditions": { + "original": { + "href": "http://localhost:5000/api/upload-raw/63d84c03b3fae2e407e0decd.jpg", + "media": "63d84c03b3fae2e407e0decd", + "mimetype": "image/jpeg", + "width": 4297, + "height": 2600, + }, + "baseImage": { + "href": "http://localhost:5000/api/upload-raw/63d84c08b3fae2e407e0dee9?_schema=http", + "media": "63d84c08b3fae2e407e0dee9", + "mimetype": "image/jpeg", + "width": 1400, + "height": 847, + }, + "thumbnail": { + "href": "http://localhost:5000/api/upload-raw/63d84c08b3fae2e407e0deeb?_schema=http", + "media": "63d84c08b3fae2e407e0deeb", + "mimetype": "image/jpeg", + "width": 198, + "height": 120, + }, + "viewImage": { + "href": "http://localhost:5000/api/upload-raw/63d84c08b3fae2e407e0deed?_schema=http", + "media": "63d84c08b3fae2e407e0deed", + "mimetype": "image/jpeg", + "width": 640, + "height": 387, + }, + "4-3": { + "width": 800, + "height": 600, + "href": "http://localhost:5000/api/upload-raw/64068da64d05f5303c82458b.jpg", + "media": "64068da64d05f5303c82458b", + "mimetype": "image/jpeg", + }, + "16-9": { + "width": 1280, + "height": 720, + "href": "http://localhost:5000/api/upload-raw/64068da74d05f5303c82458e.jpg", + "media": "64068da74d05f5303c82458e", + "mimetype": "image/jpeg", + }, + }, + "state": "in_progress", + "slugline": "Germany Daily Life", + "byline": "AP PHOTO", + "ednote": "GERMANY OUT; MANDATORY CREDIT", + "mimetype": "image/jpeg", + "uri": "20230128001757495826", + "_current_version": 2, + "expiry": "2023-04-17T17:04:39+0000", + "operation": "update", + "format": "HTML", + "priority": 6, + "urgency": None, + "genre": [{"qcode": "Article", "name": "Article"}], + "language": "en", + "alt_text": "Germany Daily Life", + "subject": [ + { + "qcode": "01000000", + "name": "arts, culture and entertainment", + } + ], + "version": 2, + "_type": "archive", + "_latest_version": 2, + } + }, + }, + }, + } + ] + }, + }, + "headline": "Video test story headline", + "word_count": 121, + "firstpublished": "2023-06-07T01:31:31.000Z", + } + seq, doc = self.formatter.format( + item, + { + "name": "Test Subscriber", + "destinations": [ + { + "config": { + "host": "ftp.abc.com", + "path": "/stories", + "associated_path": "/pictures", + "push_associated": True, + "include_original": True, + } + } + ], + "products": [1], + }, + )[0] + self.assertIn("editor_0", doc) + self.assertNotIn("editor_1", doc) + self.assertNotIn("tag:localhost:2023:4f7e5665-84e3-4cef-9181-addf9bf8474c", doc) + self.assertIn("featuremedia", doc)